diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a7c0b1baf5b4703a3eb487e8c5f6033586ab7d57 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +--- +tags: +- espnet +- audio +- automatic-speech-recognition +- speech-translation +language: multilingual +datasets: +- owsm_v3.1 +license: cc-by-4.0 +--- + +## OWSM: Open Whisper-style Speech Model + +[OWSM](https://arxiv.org/abs/2309.13876) is an Open Whisper-style Speech Model from [CMU WAVLab](https://www.wavlab.org/). It reproduces Whisper-style training using publicly available data and an open-source toolkit [ESPnet](https://github.com/espnet/espnet). + +Our demo is available [here](https://huggingface.co/spaces/pyf98/OWSM_v3_demo). The [project page](https://www.wavlab.org/activities/2024/owsm/) contains various resources. + +**[OWSM v3.1](https://arxiv.org/abs/2401.16658) is an improved version of OWSM v3. It significantly outperforms OWSM v3 in almost all evaluation benchmarks.** +We do not include any new training data. Instead, we utilize a state-of-the-art speech encoder, [E-Branchformer](https://arxiv.org/abs/2210.00077). + +This is a small-sized model with 367M parameters. It is trained on 180k hours of public speech data. Specifically, it supports the following speech-to-text tasks: +- Speech recognition +- Any-to-any-language speech translation +- Utterance-level alignment +- Long-form transcription +- Language identification + + +### Citing OWSM, Branchformers and ESPnet + +```BibTex +@misc{peng2024owsm, + title={OWSM v3.1: Better and Faster Open Whisper-Style Speech Models based on E-Branchformer}, + author={Yifan Peng and Jinchuan Tian and William Chen and Siddhant Arora and Brian Yan and Yui Sudo and Muhammad Shakeel and Kwanghee Choi and Jiatong Shi and Xuankai Chang and Jee-weon Jung and Shinji Watanabe}, + year={2024}, + eprint={2401.16658}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +@INPROCEEDINGS{owsm-asru23, + author={Peng, Yifan and Tian, Jinchuan and Yan, Brian and Berrebbi, Dan and Chang, Xuankai and Li, Xinjian and Shi, Jiatong and Arora, Siddhant and Chen, William and Sharma, Roshan and Zhang, Wangyou and Sudo, Yui and Shakeel, Muhammad and Jung, Jee-Weon and Maiti, Soumi and Watanabe, Shinji}, + booktitle={2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)}, + title={Reproducing Whisper-Style Training Using An Open-Source Toolkit And Publicly Available Data}, + year={2023}, + doi={10.1109/ASRU57964.2023.10389676} +} +@inproceedings{peng23b_interspeech, + author={Yifan Peng and Kwangyoun Kim and Felix Wu and Brian Yan and Siddhant Arora and William Chen and Jiyang Tang and Suwon Shon and Prashant Sridhar and Shinji Watanabe}, + title={{A Comparative Study on E-Branchformer vs Conformer in Speech Recognition, Translation, and Understanding Tasks}}, + year=2023, + booktitle={Proc. INTERSPEECH 2023}, + pages={2208--2212}, + doi={10.21437/Interspeech.2023-1194} +} +@inproceedings{kim2023branchformer, + title={E-branchformer: Branchformer with enhanced merging for speech recognition}, + author={Kim, Kwangyoun and Wu, Felix and Peng, Yifan and Pan, Jing and Sridhar, Prashant and Han, Kyu J and Watanabe, Shinji}, + booktitle={2022 IEEE Spoken Language Technology Workshop (SLT)}, + pages={84--91}, + year={2023}, + organization={IEEE} +} +@InProceedings{pmlr-v162-peng22a, + title = {Branchformer: Parallel {MLP}-Attention Architectures to Capture Local and Global Context for Speech Recognition and Understanding}, + author = {Peng, Yifan and Dalmia, Siddharth and Lane, Ian and Watanabe, Shinji}, + booktitle = {Proceedings of the 39th International Conference on Machine Learning}, + pages = {17627--17643}, + year = {2022}, + editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan}, + volume = {162}, + series = {Proceedings of Machine Learning Research}, + month = {17--23 Jul}, + publisher = {PMLR}, + pdf = {https://proceedings.mlr.press/v162/peng22a/peng22a.pdf}, + url = {https://proceedings.mlr.press/v162/peng22a.html}, + abstract = {Conformer has proven to be effective in many speech processing tasks. It combines the benefits of extracting local dependencies using convolutions and global dependencies using self-attention. Inspired by this, we propose a more flexible, interpretable and customizable encoder alternative, Branchformer, with parallel branches for modeling various ranged dependencies in end-to-end speech processing. In each encoder layer, one branch employs self-attention or its variant to capture long-range dependencies, while the other branch utilizes an MLP module with convolutional gating (cgMLP) to extract local relationships. We conduct experiments on several speech recognition and spoken language understanding benchmarks. Results show that our model outperforms both Transformer and cgMLP. It also matches with or outperforms state-of-the-art results achieved by Conformer. Furthermore, we show various strategies to reduce computation thanks to the two-branch architecture, including the ability to have variable inference complexity in a single trained model. The weights learned for merging branches indicate how local and global dependencies are utilized in different layers, which benefits model designing.} +} +@inproceedings{watanabe2018espnet, + author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, + title={{ESPnet}: End-to-End Speech Processing Toolkit}, + year={2018}, + booktitle={Proceedings of Interspeech}, + pages={2207--2211}, + doi={10.21437/Interspeech.2018-1456}, + url={http://dx.doi.org/10.21437/Interspeech.2018-1456} +} +``` diff --git a/data/token_list/bpe_unigram50000/bpe.model b/data/token_list/bpe_unigram50000/bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..3f386604ac50541de0d4500913350e158dbce5b8 --- /dev/null +++ b/data/token_list/bpe_unigram50000/bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d6327da127e870bcb8c737dceb3bd47ccbce63da74ddb094f64afe313d68c8c +size 1041297 diff --git a/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz b/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..4ef154c285deb458cb537da751909790aea294d2 --- /dev/null +++ b/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef4b5e465110edf32eec024cf2427eedd677f5733bb87d6b2131e6984a6e13f +size 1402 diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/RESULTS.md b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/RESULTS.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38fb582b58b61c9ae44380b843722473a91c513d --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml @@ -0,0 +1,50271 @@ +config: conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml +print_config: false +log_level: INFO +drop_last_iter: false +dry_run: false +iterator_type: sequence +valid_iterator_type: null +output_dir: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 +ngpu: 1 +seed: 42 +num_workers: 4 +num_att_plot: 0 +dist_backend: nccl +dist_init_method: file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_01501f2e-ee88-4157-a837-1cdd946f45c6 +dist_world_size: 16 +dist_rank: 0 +local_rank: 0 +dist_master_addr: null +dist_master_port: null +dist_launcher: slurm +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 45 +patience: null +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - acc + - max +- - valid + - total_count + - max +keep_nbest_models: 5 +nbest_averaging_interval: 0 +grad_clip: 5.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 1 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: true +log_interval: null +use_matplotlib: true +use_tensorboard: true +create_graph_in_tensorboard: false +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +use_lora: false +save_lora_only: true +lora_conf: {} +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: 15000 +batch_size: 256 +valid_batch_size: null +batch_bins: 1000000 +valid_batch_bins: null +train_shape_file: +- exp/s2t_stats_raw_bpe50000/splits12/speech_shape +- exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe +- exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe +- exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe +valid_shape_file: +- exp/s2t_stats_raw_bpe50000/valid/speech_shape +- exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe +- exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe +- exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe +batch_type: unsorted +valid_batch_type: null +fold_length: +- 80000 +- 150 +- 150 +- 150 +sort_in_batch: descending +shuffle_within_batch: false +sort_batch: descending +multiple_iterator: true +chunk_length: 500 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +chunk_excluded_key_prefixes: [] +chunk_default_fs: null +train_data_path_and_name_and_type: +- - exp/s2t_stats_raw_bpe50000/splits12/wav.scp + - speech + - kaldi_ark +- - exp/s2t_stats_raw_bpe50000/splits12/text.prev + - text_prev + - text +- - exp/s2t_stats_raw_bpe50000/splits12/text.ctc + - text_ctc + - text +- - exp/s2t_stats_raw_bpe50000/splits12/text + - text + - text +valid_data_path_and_name_and_type: +- - dump/raw/dev_v3/wav.scp + - speech + - kaldi_ark +- - dump/raw/dev_v3/text.prev + - text_prev + - text +- - dump/raw/dev_v3/text.ctc + - text_ctc + - text +- - dump/raw/dev_v3/text + - text + - text +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +allow_multi_rates: false +valid_max_cache_size: null +exclude_weight_decay: false +exclude_weight_decay_conf: {} +optim: adamw +optim_conf: + lr: 0.0005 + betas: + - 0.9 + - 0.98 + eps: 1.0e-06 + weight_decay: 0.0 +scheduler: piecewiselinearwarmuplr +scheduler_conf: + warmup_steps_list: + - 0 + - 30000 + - 60000 + warmup_lr_list: + - 0.0 + - 5.0e-05 + - 0.0005 +token_list: +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-
+- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- <0.00> +- <0.02> +- <0.04> +- <0.06> +- <0.08> +- <0.10> +- <0.12> +- <0.14> +- <0.16> +- <0.18> +- <0.20> +- <0.22> +- <0.24> +- <0.26> +- <0.28> +- <0.30> +- <0.32> +- <0.34> +- <0.36> +- <0.38> +- <0.40> +- <0.42> +- <0.44> +- <0.46> +- <0.48> +- <0.50> +- <0.52> +- <0.54> +- <0.56> +- <0.58> +- <0.60> +- <0.62> +- <0.64> +- <0.66> +- <0.68> +- <0.70> +- <0.72> +- <0.74> +- <0.76> +- <0.78> +- <0.80> +- <0.82> +- <0.84> +- <0.86> +- <0.88> +- <0.90> +- <0.92> +- <0.94> +- <0.96> +- <0.98> +- <1.00> +- <1.02> +- <1.04> +- <1.06> +- <1.08> +- <1.10> +- <1.12> +- <1.14> +- <1.16> +- <1.18> +- <1.20> +- <1.22> +- <1.24> +- <1.26> +- <1.28> +- <1.30> +- <1.32> +- <1.34> +- <1.36> +- <1.38> +- <1.40> +- <1.42> +- <1.44> +- <1.46> +- <1.48> +- <1.50> +- <1.52> +- <1.54> +- <1.56> +- <1.58> +- <1.60> +- <1.62> +- <1.64> +- <1.66> +- <1.68> +- <1.70> +- <1.72> +- <1.74> +- <1.76> +- <1.78> +- <1.80> +- <1.82> +- <1.84> +- <1.86> +- <1.88> +- <1.90> +- <1.92> +- <1.94> +- <1.96> +- <1.98> +- <2.00> +- <2.02> +- <2.04> +- <2.06> +- <2.08> +- <2.10> +- <2.12> +- <2.14> +- <2.16> +- <2.18> +- <2.20> +- <2.22> +- <2.24> +- <2.26> +- <2.28> +- <2.30> +- <2.32> +- <2.34> +- <2.36> +- <2.38> +- <2.40> +- <2.42> +- <2.44> +- <2.46> +- <2.48> +- <2.50> +- <2.52> +- <2.54> +- <2.56> +- <2.58> +- <2.60> +- <2.62> +- <2.64> +- <2.66> +- <2.68> +- <2.70> +- <2.72> +- <2.74> +- <2.76> +- <2.78> +- <2.80> +- <2.82> +- <2.84> +- <2.86> +- <2.88> +- <2.90> +- <2.92> +- <2.94> +- <2.96> +- <2.98> +- <3.00> +- <3.02> +- <3.04> +- <3.06> +- <3.08> +- <3.10> +- <3.12> +- <3.14> +- <3.16> +- <3.18> +- <3.20> +- <3.22> +- <3.24> +- <3.26> +- <3.28> +- <3.30> +- <3.32> +- <3.34> +- <3.36> +- <3.38> +- <3.40> +- <3.42> +- <3.44> +- <3.46> +- <3.48> +- <3.50> +- <3.52> +- <3.54> +- <3.56> +- <3.58> +- <3.60> +- <3.62> +- <3.64> +- <3.66> +- <3.68> +- <3.70> +- <3.72> +- <3.74> +- <3.76> +- <3.78> +- <3.80> +- <3.82> +- <3.84> +- <3.86> +- <3.88> +- <3.90> +- <3.92> +- <3.94> +- <3.96> +- <3.98> +- <4.00> +- <4.02> +- <4.04> +- <4.06> +- <4.08> +- <4.10> +- <4.12> +- <4.14> +- <4.16> +- <4.18> +- <4.20> +- <4.22> +- <4.24> +- <4.26> +- <4.28> +- <4.30> +- <4.32> +- <4.34> +- <4.36> +- <4.38> +- <4.40> +- <4.42> +- <4.44> +- <4.46> +- <4.48> +- <4.50> +- <4.52> +- <4.54> +- <4.56> +- <4.58> +- <4.60> +- <4.62> +- <4.64> +- <4.66> +- <4.68> +- <4.70> +- <4.72> +- <4.74> +- <4.76> +- <4.78> +- <4.80> +- <4.82> +- <4.84> +- <4.86> +- <4.88> +- <4.90> +- <4.92> +- <4.94> +- <4.96> +- <4.98> +- <5.00> +- <5.02> +- <5.04> +- <5.06> +- <5.08> +- <5.10> +- <5.12> +- <5.14> +- <5.16> +- <5.18> +- <5.20> +- <5.22> +- <5.24> +- <5.26> +- <5.28> +- <5.30> +- <5.32> +- <5.34> +- <5.36> +- <5.38> +- <5.40> +- <5.42> +- <5.44> +- <5.46> +- <5.48> +- <5.50> +- <5.52> +- <5.54> +- <5.56> +- <5.58> +- <5.60> +- <5.62> +- <5.64> +- <5.66> +- <5.68> +- <5.70> +- <5.72> +- <5.74> +- <5.76> +- <5.78> +- <5.80> +- <5.82> +- <5.84> +- <5.86> +- <5.88> +- <5.90> +- <5.92> +- <5.94> +- <5.96> +- <5.98> +- <6.00> +- <6.02> +- <6.04> +- <6.06> +- <6.08> +- <6.10> +- <6.12> +- <6.14> +- <6.16> +- <6.18> +- <6.20> +- <6.22> +- <6.24> +- <6.26> +- <6.28> +- <6.30> +- <6.32> +- <6.34> +- <6.36> +- <6.38> +- <6.40> +- <6.42> +- <6.44> +- <6.46> +- <6.48> +- <6.50> +- <6.52> +- <6.54> +- <6.56> +- <6.58> +- <6.60> +- <6.62> +- <6.64> +- <6.66> +- <6.68> +- <6.70> +- <6.72> +- <6.74> +- <6.76> +- <6.78> +- <6.80> +- <6.82> +- <6.84> +- <6.86> +- <6.88> +- <6.90> +- <6.92> +- <6.94> +- <6.96> +- <6.98> +- <7.00> +- <7.02> +- <7.04> +- <7.06> +- <7.08> +- <7.10> +- <7.12> +- <7.14> +- <7.16> +- <7.18> +- <7.20> +- <7.22> +- <7.24> +- <7.26> +- <7.28> +- <7.30> +- <7.32> +- <7.34> +- <7.36> +- <7.38> +- <7.40> +- <7.42> +- <7.44> +- <7.46> +- <7.48> +- <7.50> +- <7.52> +- <7.54> +- <7.56> +- <7.58> +- <7.60> +- <7.62> +- <7.64> +- <7.66> +- <7.68> +- <7.70> +- <7.72> +- <7.74> +- <7.76> +- <7.78> +- <7.80> +- <7.82> +- <7.84> +- <7.86> +- <7.88> +- <7.90> +- <7.92> +- <7.94> +- <7.96> +- <7.98> +- <8.00> +- <8.02> +- <8.04> +- <8.06> +- <8.08> +- <8.10> +- <8.12> +- <8.14> +- <8.16> +- <8.18> +- <8.20> +- <8.22> +- <8.24> +- <8.26> +- <8.28> +- <8.30> +- <8.32> +- <8.34> +- <8.36> +- <8.38> +- <8.40> +- <8.42> +- <8.44> +- <8.46> +- <8.48> +- <8.50> +- <8.52> +- <8.54> +- <8.56> +- <8.58> +- <8.60> +- <8.62> +- <8.64> +- <8.66> +- <8.68> +- <8.70> +- <8.72> +- <8.74> +- <8.76> +- <8.78> +- <8.80> +- <8.82> +- <8.84> +- <8.86> +- <8.88> +- <8.90> +- <8.92> +- <8.94> +- <8.96> +- <8.98> +- <9.00> +- <9.02> +- <9.04> +- <9.06> +- <9.08> +- <9.10> +- <9.12> +- <9.14> +- <9.16> +- <9.18> +- <9.20> +- <9.22> +- <9.24> +- <9.26> +- <9.28> +- <9.30> +- <9.32> +- <9.34> +- <9.36> +- <9.38> +- <9.40> +- <9.42> +- <9.44> +- <9.46> +- <9.48> +- <9.50> +- <9.52> +- <9.54> +- <9.56> +- <9.58> +- <9.60> +- <9.62> +- <9.64> +- <9.66> +- <9.68> +- <9.70> +- <9.72> +- <9.74> +- <9.76> +- <9.78> +- <9.80> +- <9.82> +- <9.84> +- <9.86> +- <9.88> +- <9.90> +- <9.92> +- <9.94> +- <9.96> +- <9.98> +- <10.00> +- <10.02> +- <10.04> +- <10.06> +- <10.08> +- <10.10> +- <10.12> +- <10.14> +- <10.16> +- <10.18> +- <10.20> +- <10.22> +- <10.24> +- <10.26> +- <10.28> +- <10.30> +- <10.32> +- <10.34> +- <10.36> +- <10.38> +- <10.40> +- <10.42> +- <10.44> +- <10.46> +- <10.48> +- <10.50> +- <10.52> +- <10.54> +- <10.56> +- <10.58> +- <10.60> +- <10.62> +- <10.64> +- <10.66> +- <10.68> +- <10.70> +- <10.72> +- <10.74> +- <10.76> +- <10.78> +- <10.80> +- <10.82> +- <10.84> +- <10.86> +- <10.88> +- <10.90> +- <10.92> +- <10.94> +- <10.96> +- <10.98> +- <11.00> +- <11.02> +- <11.04> +- <11.06> +- <11.08> +- <11.10> +- <11.12> +- <11.14> +- <11.16> +- <11.18> +- <11.20> +- <11.22> +- <11.24> +- <11.26> +- <11.28> +- <11.30> +- <11.32> +- <11.34> +- <11.36> +- <11.38> +- <11.40> +- <11.42> +- <11.44> +- <11.46> +- <11.48> +- <11.50> +- <11.52> +- <11.54> +- <11.56> +- <11.58> +- <11.60> +- <11.62> +- <11.64> +- <11.66> +- <11.68> +- <11.70> +- <11.72> +- <11.74> +- <11.76> +- <11.78> +- <11.80> +- <11.82> +- <11.84> +- <11.86> +- <11.88> +- <11.90> +- <11.92> +- <11.94> +- <11.96> +- <11.98> +- <12.00> +- <12.02> +- <12.04> +- <12.06> +- <12.08> +- <12.10> +- <12.12> +- <12.14> +- <12.16> +- <12.18> +- <12.20> +- <12.22> +- <12.24> +- <12.26> +- <12.28> +- <12.30> +- <12.32> +- <12.34> +- <12.36> +- <12.38> +- <12.40> +- <12.42> +- <12.44> +- <12.46> +- <12.48> +- <12.50> +- <12.52> +- <12.54> +- <12.56> +- <12.58> +- <12.60> +- <12.62> +- <12.64> +- <12.66> +- <12.68> +- <12.70> +- <12.72> +- <12.74> +- <12.76> +- <12.78> +- <12.80> +- <12.82> +- <12.84> +- <12.86> +- <12.88> +- <12.90> +- <12.92> +- <12.94> +- <12.96> +- <12.98> +- <13.00> +- <13.02> +- <13.04> +- <13.06> +- <13.08> +- <13.10> +- <13.12> +- <13.14> +- <13.16> +- <13.18> +- <13.20> +- <13.22> +- <13.24> +- <13.26> +- <13.28> +- <13.30> +- <13.32> +- <13.34> +- <13.36> +- <13.38> +- <13.40> +- <13.42> +- <13.44> +- <13.46> +- <13.48> +- <13.50> +- <13.52> +- <13.54> +- <13.56> +- <13.58> +- <13.60> +- <13.62> +- <13.64> +- <13.66> +- <13.68> +- <13.70> +- <13.72> +- <13.74> +- <13.76> +- <13.78> +- <13.80> +- <13.82> +- <13.84> +- <13.86> +- <13.88> +- <13.90> +- <13.92> +- <13.94> +- <13.96> +- <13.98> +- <14.00> +- <14.02> +- <14.04> +- <14.06> +- <14.08> +- <14.10> +- <14.12> +- <14.14> +- <14.16> +- <14.18> +- <14.20> +- <14.22> +- <14.24> +- <14.26> +- <14.28> +- <14.30> +- <14.32> +- <14.34> +- <14.36> +- <14.38> +- <14.40> +- <14.42> +- <14.44> +- <14.46> +- <14.48> +- <14.50> +- <14.52> +- <14.54> +- <14.56> +- <14.58> +- <14.60> +- <14.62> +- <14.64> +- <14.66> +- <14.68> +- <14.70> +- <14.72> +- <14.74> +- <14.76> +- <14.78> +- <14.80> +- <14.82> +- <14.84> +- <14.86> +- <14.88> +- <14.90> +- <14.92> +- <14.94> +- <14.96> +- <14.98> +- <15.00> +- <15.02> +- <15.04> +- <15.06> +- <15.08> +- <15.10> +- <15.12> +- <15.14> +- <15.16> +- <15.18> +- <15.20> +- <15.22> +- <15.24> +- <15.26> +- <15.28> +- <15.30> +- <15.32> +- <15.34> +- <15.36> +- <15.38> +- <15.40> +- <15.42> +- <15.44> +- <15.46> +- <15.48> +- <15.50> +- <15.52> +- <15.54> +- <15.56> +- <15.58> +- <15.60> +- <15.62> +- <15.64> +- <15.66> +- <15.68> +- <15.70> +- <15.72> +- <15.74> +- <15.76> +- <15.78> +- <15.80> +- <15.82> +- <15.84> +- <15.86> +- <15.88> +- <15.90> +- <15.92> +- <15.94> +- <15.96> +- <15.98> +- <16.00> +- <16.02> +- <16.04> +- <16.06> +- <16.08> +- <16.10> +- <16.12> +- <16.14> +- <16.16> +- <16.18> +- <16.20> +- <16.22> +- <16.24> +- <16.26> +- <16.28> +- <16.30> +- <16.32> +- <16.34> +- <16.36> +- <16.38> +- <16.40> +- <16.42> +- <16.44> +- <16.46> +- <16.48> +- <16.50> +- <16.52> +- <16.54> +- <16.56> +- <16.58> +- <16.60> +- <16.62> +- <16.64> +- <16.66> +- <16.68> +- <16.70> +- <16.72> +- <16.74> +- <16.76> +- <16.78> +- <16.80> +- <16.82> +- <16.84> +- <16.86> +- <16.88> +- <16.90> +- <16.92> +- <16.94> +- <16.96> +- <16.98> +- <17.00> +- <17.02> +- <17.04> +- <17.06> +- <17.08> +- <17.10> +- <17.12> +- <17.14> +- <17.16> +- <17.18> +- <17.20> +- <17.22> +- <17.24> +- <17.26> +- <17.28> +- <17.30> +- <17.32> +- <17.34> +- <17.36> +- <17.38> +- <17.40> +- <17.42> +- <17.44> +- <17.46> +- <17.48> +- <17.50> +- <17.52> +- <17.54> +- <17.56> +- <17.58> +- <17.60> +- <17.62> +- <17.64> +- <17.66> +- <17.68> +- <17.70> +- <17.72> +- <17.74> +- <17.76> +- <17.78> +- <17.80> +- <17.82> +- <17.84> +- <17.86> +- <17.88> +- <17.90> +- <17.92> +- <17.94> +- <17.96> +- <17.98> +- <18.00> +- <18.02> +- <18.04> +- <18.06> +- <18.08> +- <18.10> +- <18.12> +- <18.14> +- <18.16> +- <18.18> +- <18.20> +- <18.22> +- <18.24> +- <18.26> +- <18.28> +- <18.30> +- <18.32> +- <18.34> +- <18.36> +- <18.38> +- <18.40> +- <18.42> +- <18.44> +- <18.46> +- <18.48> +- <18.50> +- <18.52> +- <18.54> +- <18.56> +- <18.58> +- <18.60> +- <18.62> +- <18.64> +- <18.66> +- <18.68> +- <18.70> +- <18.72> +- <18.74> +- <18.76> +- <18.78> +- <18.80> +- <18.82> +- <18.84> +- <18.86> +- <18.88> +- <18.90> +- <18.92> +- <18.94> +- <18.96> +- <18.98> +- <19.00> +- <19.02> +- <19.04> +- <19.06> +- <19.08> +- <19.10> +- <19.12> +- <19.14> +- <19.16> +- <19.18> +- <19.20> +- <19.22> +- <19.24> +- <19.26> +- <19.28> +- <19.30> +- <19.32> +- <19.34> +- <19.36> +- <19.38> +- <19.40> +- <19.42> +- <19.44> +- <19.46> +- <19.48> +- <19.50> +- <19.52> +- <19.54> +- <19.56> +- <19.58> +- <19.60> +- <19.62> +- <19.64> +- <19.66> +- <19.68> +- <19.70> +- <19.72> +- <19.74> +- <19.76> +- <19.78> +- <19.80> +- <19.82> +- <19.84> +- <19.86> +- <19.88> +- <19.90> +- <19.92> +- <19.94> +- <19.96> +- <19.98> +- <20.00> +- <20.02> +- <20.04> +- <20.06> +- <20.08> +- <20.10> +- <20.12> +- <20.14> +- <20.16> +- <20.18> +- <20.20> +- <20.22> +- <20.24> +- <20.26> +- <20.28> +- <20.30> +- <20.32> +- <20.34> +- <20.36> +- <20.38> +- <20.40> +- <20.42> +- <20.44> +- <20.46> +- <20.48> +- <20.50> +- <20.52> +- <20.54> +- <20.56> +- <20.58> +- <20.60> +- <20.62> +- <20.64> +- <20.66> +- <20.68> +- <20.70> +- <20.72> +- <20.74> +- <20.76> +- <20.78> +- <20.80> +- <20.82> +- <20.84> +- <20.86> +- <20.88> +- <20.90> +- <20.92> +- <20.94> +- <20.96> +- <20.98> +- <21.00> +- <21.02> +- <21.04> +- <21.06> +- <21.08> +- <21.10> +- <21.12> +- <21.14> +- <21.16> +- <21.18> +- <21.20> +- <21.22> +- <21.24> +- <21.26> +- <21.28> +- <21.30> +- <21.32> +- <21.34> +- <21.36> +- <21.38> +- <21.40> +- <21.42> +- <21.44> +- <21.46> +- <21.48> +- <21.50> +- <21.52> +- <21.54> +- <21.56> +- <21.58> +- <21.60> +- <21.62> +- <21.64> +- <21.66> +- <21.68> +- <21.70> +- <21.72> +- <21.74> +- <21.76> +- <21.78> +- <21.80> +- <21.82> +- <21.84> +- <21.86> +- <21.88> +- <21.90> +- <21.92> +- <21.94> +- <21.96> +- <21.98> +- <22.00> +- <22.02> +- <22.04> +- <22.06> +- <22.08> +- <22.10> +- <22.12> +- <22.14> +- <22.16> +- <22.18> +- <22.20> +- <22.22> +- <22.24> +- <22.26> +- <22.28> +- <22.30> +- <22.32> +- <22.34> +- <22.36> +- <22.38> +- <22.40> +- <22.42> +- <22.44> +- <22.46> +- <22.48> +- <22.50> +- <22.52> +- <22.54> +- <22.56> +- <22.58> +- <22.60> +- <22.62> +- <22.64> +- <22.66> +- <22.68> +- <22.70> +- <22.72> +- <22.74> +- <22.76> +- <22.78> +- <22.80> +- <22.82> +- <22.84> +- <22.86> +- <22.88> +- <22.90> +- <22.92> +- <22.94> +- <22.96> +- <22.98> +- <23.00> +- <23.02> +- <23.04> +- <23.06> +- <23.08> +- <23.10> +- <23.12> +- <23.14> +- <23.16> +- <23.18> +- <23.20> +- <23.22> +- <23.24> +- <23.26> +- <23.28> +- <23.30> +- <23.32> +- <23.34> +- <23.36> +- <23.38> +- <23.40> +- <23.42> +- <23.44> +- <23.46> +- <23.48> +- <23.50> +- <23.52> +- <23.54> +- <23.56> +- <23.58> +- <23.60> +- <23.62> +- <23.64> +- <23.66> +- <23.68> +- <23.70> +- <23.72> +- <23.74> +- <23.76> +- <23.78> +- <23.80> +- <23.82> +- <23.84> +- <23.86> +- <23.88> +- <23.90> +- <23.92> +- <23.94> +- <23.96> +- <23.98> +- <24.00> +- <24.02> +- <24.04> +- <24.06> +- <24.08> +- <24.10> +- <24.12> +- <24.14> +- <24.16> +- <24.18> +- <24.20> +- <24.22> +- <24.24> +- <24.26> +- <24.28> +- <24.30> +- <24.32> +- <24.34> +- <24.36> +- <24.38> +- <24.40> +- <24.42> +- <24.44> +- <24.46> +- <24.48> +- <24.50> +- <24.52> +- <24.54> +- <24.56> +- <24.58> +- <24.60> +- <24.62> +- <24.64> +- <24.66> +- <24.68> +- <24.70> +- <24.72> +- <24.74> +- <24.76> +- <24.78> +- <24.80> +- <24.82> +- <24.84> +- <24.86> +- <24.88> +- <24.90> +- <24.92> +- <24.94> +- <24.96> +- <24.98> +- <25.00> +- <25.02> +- <25.04> +- <25.06> +- <25.08> +- <25.10> +- <25.12> +- <25.14> +- <25.16> +- <25.18> +- <25.20> +- <25.22> +- <25.24> +- <25.26> +- <25.28> +- <25.30> +- <25.32> +- <25.34> +- <25.36> +- <25.38> +- <25.40> +- <25.42> +- <25.44> +- <25.46> +- <25.48> +- <25.50> +- <25.52> +- <25.54> +- <25.56> +- <25.58> +- <25.60> +- <25.62> +- <25.64> +- <25.66> +- <25.68> +- <25.70> +- <25.72> +- <25.74> +- <25.76> +- <25.78> +- <25.80> +- <25.82> +- <25.84> +- <25.86> +- <25.88> +- <25.90> +- <25.92> +- <25.94> +- <25.96> +- <25.98> +- <26.00> +- <26.02> +- <26.04> +- <26.06> +- <26.08> +- <26.10> +- <26.12> +- <26.14> +- <26.16> +- <26.18> +- <26.20> +- <26.22> +- <26.24> +- <26.26> +- <26.28> +- <26.30> +- <26.32> +- <26.34> +- <26.36> +- <26.38> +- <26.40> +- <26.42> +- <26.44> +- <26.46> +- <26.48> +- <26.50> +- <26.52> +- <26.54> +- <26.56> +- <26.58> +- <26.60> +- <26.62> +- <26.64> +- <26.66> +- <26.68> +- <26.70> +- <26.72> +- <26.74> +- <26.76> +- <26.78> +- <26.80> +- <26.82> +- <26.84> +- <26.86> +- <26.88> +- <26.90> +- <26.92> +- <26.94> +- <26.96> +- <26.98> +- <27.00> +- <27.02> +- <27.04> +- <27.06> +- <27.08> +- <27.10> +- <27.12> +- <27.14> +- <27.16> +- <27.18> +- <27.20> +- <27.22> +- <27.24> +- <27.26> +- <27.28> +- <27.30> +- <27.32> +- <27.34> +- <27.36> +- <27.38> +- <27.40> +- <27.42> +- <27.44> +- <27.46> +- <27.48> +- <27.50> +- <27.52> +- <27.54> +- <27.56> +- <27.58> +- <27.60> +- <27.62> +- <27.64> +- <27.66> +- <27.68> +- <27.70> +- <27.72> +- <27.74> +- <27.76> +- <27.78> +- <27.80> +- <27.82> +- <27.84> +- <27.86> +- <27.88> +- <27.90> +- <27.92> +- <27.94> +- <27.96> +- <27.98> +- <28.00> +- <28.02> +- <28.04> +- <28.06> +- <28.08> +- <28.10> +- <28.12> +- <28.14> +- <28.16> +- <28.18> +- <28.20> +- <28.22> +- <28.24> +- <28.26> +- <28.28> +- <28.30> +- <28.32> +- <28.34> +- <28.36> +- <28.38> +- <28.40> +- <28.42> +- <28.44> +- <28.46> +- <28.48> +- <28.50> +- <28.52> +- <28.54> +- <28.56> +- <28.58> +- <28.60> +- <28.62> +- <28.64> +- <28.66> +- <28.68> +- <28.70> +- <28.72> +- <28.74> +- <28.76> +- <28.78> +- <28.80> +- <28.82> +- <28.84> +- <28.86> +- <28.88> +- <28.90> +- <28.92> +- <28.94> +- <28.96> +- <28.98> +- <29.00> +- <29.02> +- <29.04> +- <29.06> +- <29.08> +- <29.10> +- <29.12> +- <29.14> +- <29.16> +- <29.18> +- <29.20> +- <29.22> +- <29.24> +- <29.26> +- <29.28> +- <29.30> +- <29.32> +- <29.34> +- <29.36> +- <29.38> +- <29.40> +- <29.42> +- <29.44> +- <29.46> +- <29.48> +- <29.50> +- <29.52> +- <29.54> +- <29.56> +- <29.58> +- <29.60> +- <29.62> +- <29.64> +- <29.66> +- <29.68> +- <29.70> +- <29.72> +- <29.74> +- <29.76> +- <29.78> +- <29.80> +- <29.82> +- <29.84> +- <29.86> +- <29.88> +- <29.90> +- <29.92> +- <29.94> +- <29.96> +- <29.98> +- <30.00> +- ▁ +- ',' +- ▁the +- . +- s +- ▁and +- ▁of +- ▁to +- ▁a +- 。 +- ▁in +- '''' +- ▁that +- ▁i +- ▁it +- ▁was +- en +- e +- t +- ▁you +- ▁he +- ▁is +- 的 +- '-' +- ▁for +- ▁de +- ▁with +- ▁be +- n +- d +- ▁as +- ▁his +- ▁we +- の +- ▁on +- 、 +- ▁die +- ▁had +- a +- ▁but +- ▁so +- '?' +- o +- ▁not +- ▁at +- ▁have +- ▁this +- ▁und +- er +- ▁her +- i +- ▁they +- m +- ed +- re +- ▁she +- ▁an +- ▁by +- ▁der +- ing +- ▁all +- ▁are +- ▁la +- が +- を +- ▁from +- ▁me +- ▁which +- ▁my +- es +- ▁one +- は +- ▁no +- ▁there +- 了 +- ▁him +- ▁or +- ▁were +- に +- y +- ▁do +- ▁what +- r +- ▁zu +- ▁our +- u +- ▁if +- ly +- ▁ist +- ▁would +- ▁when +- ▁their +- ▁will +- ▁said +- で +- ▁es +- ▁like +- ▁das +- ▁Sie +- ▁them +- 在 +- ▁can +- ▁who +- ▁out +- ▁know +- te +- ▁been +- ▁ich +- ▁about +- ▁up +- ▁I +- '!' +- ▁more +- ▁man +- ve +- 我 +- ▁que +- ▁un +- 是 +- ▁sie +- 和 +- ▁then +- ▁your +- ▁ein +- ▁some +- ▁den +- а +- ▁now +- 你 +- ▁von +- ▁nicht +- ▁very +- と +- ▁has +- ▁into +- in +- ▁time +- ▁в +- ar +- ▁just +- ▁не +- ▁um +- k +- ▁could +- ▁dass +- ▁Und +- 人 +- ▁think +- ▁auf +- al +- de +- ▁и +- ▁war +- ▁eine +- ▁over +- ▁mit +- ▁well +- も +- 他 +- ▁see +- ▁also +- ▁wir +- ▁other +- ▁des +- le +- ▁how +- ll +- ▁than +- 不 +- 啊 +- ▁these +- ▁little +- ▁sich +- ▁" +- ten +- c +- an +- ▁us +- 上 +- ▁l +- ▁two +- ▁any +- 有 +- ▁don +- ▁go +- ▁did +- ▁people +- ▁only +- е +- na +- ▁good +- l +- st +- ▁where +- ▁на +- ▁el +- ▁se +- z +- to +- 中 +- ▁first +- g +- ta +- ▁na +- ▁di +- ▁als +- is +- ▁come +- ▁Ich +- ▁much +- ▁für +- ne +- ▁с +- 我们 +- 大 +- ▁get +- ▁here +- ▁down +- у +- ▁du +- ▁le +- ▁что +- から +- ▁way +- h +- ▁y +- ▁wie +- ▁should +- ▁before +- ▁am +- ▁made +- ▁those +- ▁after +- 一 +- 'on' +- ▁upon +- ▁because +- ▁back +- ▁right +- ▁haben +- se +- ▁great +- 他们 +- ▁say +- ▁going +- です +- 这个 +- ▁dem +- и +- 就 +- f +- ▁по +- ▁make +- ra +- 来 +- ▁The +- م +- ▁er +- ▁its +- ▁such +- ▁ver +- ▁may +- w +- ge +- ▁even +- la +- ▁men +- 说 +- ':' +- ▁new +- ▁im +- 地 +- é +- da +- ch +- ▁through +- 呢 +- ▁long +- な +- me +- ▁never +- ▁most +- ▁Es +- as +- il +- p +- ▁A +- ▁must +- '"' +- ▁sind +- ▁day +- し +- 一个 +- ▁really +- ▁per +- b +- ▁came +- ▁con +- '1' +- 这 +- man +- ▁And +- ▁life +- ▁many +- м +- ▁old +- est +- я +- j +- ▁я +- ▁yeah +- 'no' +- ro +- ▁again +- т +- ▁things +- ▁La +- ▁hat +- be +- ma +- ▁mu +- ▁own +- 她 +- den +- ▁take +- ci +- ▁et +- ▁aus +- ▁being +- ▁je +- ga +- os +- 它 +- ni +- we +- ▁mr +- ▁still +- ir +- ▁我 +- '2' +- ▁last +- か +- 小 +- ▁too +- ▁les +- ▁might +- 会 +- ▁work +- us +- ▁у +- ce +- や +- ▁every +- un +- ▁We +- 子 +- ▁ku +- ▁al +- 去 +- ▁за +- ▁So +- ы +- at +- ▁want +- る +- ی +- ▁Die +- 到 +- it +- ▁went +- ▁look +- ▁über +- х +- ▁In +- ti +- ▁years +- 对 +- ’ +- 吗 +- ▁это +- ▁same +- ▁three +- ▁something +- ▁da +- do +- li +- ▁werden +- ▁let +- ▁thought +- ▁himself +- ▁ne +- й +- ▁ah +- 被 +- ▁van +- って +- ▁got +- ▁ge +- д +- ▁world +- ね +- い +- ▁wenn +- va +- 好 +- ▁einen +- 要 +- ▁thing +- ▁while +- ▁à +- ▁part +- 都 +- ه +- с +- 着 +- ▁year +- el +- ▁del +- ▁oh +- ▁away +- ▁hand +- ▁Er +- em +- et +- ho +- 下 +- た +- ▁Das +- ▁auch +- ka +- ▁ya +- ter +- ▁kind +- ▁without +- ▁place +- ж +- ▁put +- 出 +- lo +- ba +- ment +- ▁oder +- ▁under +- 没有 +- した +- л +- ▁вы +- ▁una +- ▁off +- th +- ▁то +- ri +- 里 +- ▁yet +- ze +- н +- する +- ▁diese +- ▁another +- ers +- ▁why +- je +- ▁found +- 从 +- 时 +- ko +- 就是 +- sa +- ng +- ▁S +- ▁shall +- ▁aber +- ▁sein +- or +- ▁а +- 点 +- '3' +- ur +- ка +- ▁tell +- ▁god +- お +- ▁و +- ▁Aber +- ke +- ness +- 也 +- ▁far +- able +- ▁give +- si +- して +- ▁though +- ▁nothing +- mo +- ▁eyes +- 手 +- ▁once +- 家 +- x +- ver +- ▁always +- ▁het +- 日 +- ▁saw +- ▁house +- men +- 用 +- di +- ▁ha +- この +- ja +- go +- ▁wird +- ▁ever +- ▁een +- ▁face +- о +- ) +- ▁vor +- ▁你 +- 年 +- ▁love +- am +- 想 +- ung +- ent +- ▁uns +- ▁home +- he +- на +- ▁habe +- ation +- ▁head +- ▁nach +- ли +- ▁nur +- mi +- ▁few +- zi +- ▁young +- но +- ن +- て +- ▁mean +- ▁mich +- ki +- ▁better +- ▁each +- ▁einem +- ▁find +- ha +- ▁si +- ▁business +- ш +- ت +- では +- ▁left +- ▁both +- ton +- ▁It +- ▁v +- der +- v +- ▁einer +- 那 +- 为 +- wa +- ▁night +- · +- 过 +- ▁took +- ▁( +- ▁können +- ▁mind +- ▁— +- ya +- ▁father +- ▁mir +- ▁moment +- ▁done +- 得 +- ▁va +- ca +- د +- 水 +- ▁su +- 」 +- ▁against +- ▁need +- ry +- 吧 +- 的人 +- ▁end +- ▁yes +- sch +- ▁lot +- ity +- という +- um +- ▁course +- co +- ▁dat +- ▁half +- ▁between +- 高 +- ▁Wir +- ▁من +- ant +- り +- ▁next +- ste +- 当 +- ▁noch +- ▁pro +- ▁He +- 多 +- 做 +- ▁E +- gen +- 本 +- 这些 +- 事 +- でも +- ▁می +- ▁door +- к +- ▁called +- ted +- son +- ▁looked +- za +- ▁ja +- ul +- 前 +- tu +- ▁ko +- ▁wurde +- ye +- ス +- ، +- 什么 +- ▁por +- ▁kann +- ▁los +- ▁asked +- ش +- ▁seen +- ▁как +- 回 +- ла +- 「 +- ▁дробь +- ▁улица +- 心 +- ▁side +- ▁told +- ▁does +- ら +- '4' +- 者 +- lu +- ▁son +- ▁having +- 生 +- ▁water +- 分 +- ol +- 三 +- ▁sehr +- ▁quite +- ▁hatte +- 看 +- ▁El +- ▁heard +- ▁whole +- ▁heart +- ▁believe +- '".' +- ▁te +- ▁met +- ч +- 打 +- 与 +- ▁name +- ▁uh +- 的时候 +- ▁ni +- 而 +- ▁mother +- ▁در +- ▁ال +- vo +- 把 +- ▁bei +- land +- 能 +- ▁ob +- ▁knew +- ▁о +- ▁op +- ▁ma +- ▁different +- ▁around +- ▁best +- ▁به +- ▁call +- 可以 +- ▁mi +- ▁enough +- ▁il +- ▁second +- ▁para +- く +- 行 +- 自己 +- ▁par +- 性 +- ie +- ▁point +- ▁seemed +- ю +- ます +- 后 +- ▁ba +- ▁set +- ▁four +- 所 +- ました +- ة +- ▁pas +- ▁mrs +- so +- には +- ▁woman +- ia +- ven +- ▁room +- ▁да +- 山 +- ting +- その +- ・ +- bo +- S +- ▁five +- ▁che +- ion +- 이 +- ▁high +- 走 +- ▁所以 +- ▁sure +- ▁但是 +- ▁durch +- 还 +- '5' +- ▁hundred +- ▁country +- ▁light +- ▁sir +- та +- ▁use +- ▁hier +- ▁anything +- ▁sa +- г +- ▁к +- ▁actually +- 现在 +- ad +- by +- ▁bir +- ku +- ▁он +- ▁quarter +- ▁wa +- ▁po +- ó +- ▁при +- ▁almost +- ▁про +- ▁days +- ▁от +- ▁他 +- 天 +- ken +- ▁help +- 最 +- ▁care +- ▁sort +- ر +- 您 +- ▁dieser +- ▁Der +- po +- 特 +- id +- 新 +- ▁L +- ▁ab +- ですね +- ▁gibt +- ▁soon +- ler +- 国 +- ▁com +- ▁з +- ▁Be +- 我的 +- ▁white +- ▁alle +- ▁dann +- du +- ling +- ▁small +- 力 +- ي +- less +- ▁together +- ▁fact +- im +- ▁since +- ▁money +- ak +- ▁мы +- 很 +- à +- 个 +- 你的 +- ▁qui +- ▁doing +- さん +- hi +- 更 +- ▁until +- 给 +- ine +- 斯 +- ▁talk +- ▁question +- ▁nor +- 老 +- き +- ty +- ▁twenty +- ▁didn +- ▁used +- ik +- vi +- ▁bit +- ▁hard +- ▁miss +- zu +- ▁big +- ▁full +- ▁Le +- ▁Re +- ▁yn +- ▁De +- ist +- よ +- 非常 +- au +- ▁gave +- 不是 +- ▁morning +- さ +- з +- ▁mehr +- ов +- 将 +- ку +- ▁keep +- pa +- ut +- 可能 +- ▁lord +- ▁rest +- ▁number +- ▁An +- que +- ▁real +- 가 +- ▁words +- ▁began +- and +- ки +- 呀 +- ▁ik +- ▁immer +- ▁however +- ▁hands +- ▁open +- ▁king +- 头 +- ▁bin +- ▁ب +- A +- ▁lo +- bi +- ▁looking +- ▁раз +- ▁pe +- led +- ل +- ▁па +- ▁c +- п +- ig +- р +- ▁turned +- ▁feel +- ▁En +- 那个 +- ▁waren +- 可 +- lar +- ic +- ▁і +- ▁felt +- ▁так +- 比 +- ▁zum +- ▁bu +- ها +- 再 +- ▁في +- ▁wirklich +- 像 +- в +- 外 +- ▁etwas +- 开始 +- 已经 +- 长 +- ちょっと +- ▁poor +- ▁pour +- ▁D +- ▁power +- 或 +- 名 +- ▁word +- ▁among +- gi +- 度 +- ▁myself +- ▁children +- ▁during +- ا +- ▁whom +- ▁ka +- しました +- ▁这 +- ▁lady +- ▁large +- ▁matter +- ▁death +- ▁Ver +- ▁Ja +- ▁vi +- pe +- ▁true +- '6' +- ▁certain +- ate +- ом +- kan +- nt +- ▁state +- ▁pa +- 并 +- ь +- ▁till +- ▁که +- ▁dans +- ▁person +- 月 +- 金 +- ▁present +- ▁general +- les +- ▁tun +- ▁O +- tion +- ă +- 向 +- ▁boy +- 因为 +- ▁live +- ▁case +- ▁est +- ▁brought +- 以 +- ▁black +- 成 +- 又 +- 你们 +- ek +- ▁order +- ▁dis +- oj +- ▁rather +- mu +- だ +- ▁Se +- 太 +- ▁voice +- ▁w +- ▁все +- ▁taken +- zo +- 道 +- ▁Menschen +- 先 +- jo +- ть +- ▁Wenn +- 让 +- ▁perhaps +- ▁given +- ない +- 内 +- ▁hear +- ▁already +- ju +- 所以 +- ся +- ▁une +- dy +- ▁important +- ▁girl +- ▁human +- ▁show +- 发 +- ▁вот +- 这种 +- ▁ihre +- ▁را +- ▁C +- wi +- ▁hij +- á +- ▁ihr +- ▁B +- ▁qu +- ▁ihn +- ▁K +- ▁company +- lan +- ▁meine +- ▁strong +- ren +- ▁thousand +- the +- った +- га +- ▁seine +- す +- ▁air +- ▁coming +- ▁But +- 面 +- ▁friend +- ▁-- +- ben +- ▁within +- ▁round +- 海 +- 方 +- ▁Li +- ▁change +- ва +- ji +- ard +- ▁six +- み +- 二 +- ai +- ▁often +- ▁everything +- ▁sea +- ع +- 没 +- ▁along +- nie +- ▁sent +- ▁Ein +- ▁whether +- 他的 +- '7' +- ▁wissen +- 无 +- う +- 这是 +- とか +- 尔 +- per +- ▁estas +- 但 +- ▁но +- ieren +- ▁This +- ▁car +- ive +- ▁themselves +- ب +- 车 +- 口 +- ▁forward +- 入 +- 学 +- ▁co +- ▁N +- ado +- ▁Na +- 西 +- 何 +- ▁others +- ▁za +- í +- つ +- го +- ▁我们 +- ▁idea +- 开 +- ▁keur +- س +- ▁across +- ▁dear +- ▁stood +- ▁school +- ▁那 +- ▁understand +- ▁sagte +- ▁making +- ▁dan +- ck +- bu +- ▁family +- ▁fire +- ▁nature +- ▁near +- ▁из +- 于 +- ем +- もう +- ▁times +- да +- ▁herself +- ▁public +- ▁niet +- ▁b +- ▁Zeit +- ▁read +- ▁cannot +- ▁pre +- ▁hope +- 花 +- ▁turn +- ▁city +- ル +- ▁این +- cu +- ▁würde +- è +- ▁است +- ▁zijn +- ger +- 死 +- ▁از +- sta +- ны +- ▁viel +- ▁li +- ner +- ▁women +- ▁gone +- ▁sagen +- ster +- ▁non +- ▁sur +- 听 +- ▁然后 +- fe +- ▁ser +- ▁means +- ▁thus +- いい +- ▁tu +- ▁child +- ▁least +- ق +- ▁sehen +- ▁T +- ▁nu +- 化 +- ▁pretty +- aba +- ▁ki +- ты +- ▁sense +- ▁says +- ada +- ▁أ +- fa +- 工作 +- ので +- ▁reason +- 这样 +- ▁einfach +- ▁keine +- ▁behind +- 日本 +- ▁この +- ▁wife +- ▁Il +- T +- E +- いた +- che +- ▁voor +- ▁short +- ▁bi +- 一些 +- ▁body +- ▁indeed +- 先生 +- ▁ce +- ▁feet +- wo +- 一样 +- '8' +- ▁M +- 就像 +- ▁wo +- 还是 +- 德 +- نا +- ▁до +- ▁Ma +- ▁sat +- ▁continue +- ま +- ts +- ▁unter +- age +- q +- ▁ask +- 儿 +- ▁U +- ▁close +- ить +- ம் +- ▁several +- 放 +- ▁won +- ▁om +- ية +- ▁whose +- не +- ▁market +- 之 +- ▁red +- ▁possible +- C +- up +- 你知道 +- ▁amb +- イ +- わ +- 部 +- ▁leave +- tes +- など +- 는 +- として +- ▁less +- ile +- ク +- ▁become +- ля +- ▁later +- 重 +- 拉 +- ان +- 吃 +- ▁known +- ▁Un +- ف +- ▁growth +- zen +- ä +- P +- ft +- ▁Ge +- ▁friends +- و +- lich +- ▁either +- ny +- gu +- ▁story +- ▁probably +- 体 +- ous +- ana +- ▁ihm +- ー +- ▁Sch +- ▁able +- ▁thou +- ▁else +- ▁wieder +- io +- 白 +- ц +- 跟 +- 万 +- ▁run +- 光 +- 位 +- ▁diesem +- ▁bad +- б +- ▁alone +- 但是 +- ات +- ▁そして +- ▁gut +- ز +- ▁start +- 法 +- ▁past +- 路 +- ▁itself +- iert +- aj +- ▁în +- š +- ▁Art +- ле +- ▁jetzt +- I +- ran +- і +- ▁therefore +- ア +- ▁free +- ▁sometimes +- ▁passed +- ў +- ▁speak +- mos +- 数 +- 问题 +- yo +- ▁book +- ▁line +- ▁andere +- ▁mal +- chen +- ми +- ▁Bu +- ▁above +- ▁became +- ▁Al +- bar +- 我们的 +- ▁wanted +- des +- ال +- way +- 利 +- era +- ك +- ▁bo +- ▁Je +- ▁cried +- '9' +- ban +- ▁today +- tro +- ▁au +- 今 +- 고 +- ▁getting +- 别 +- ▁okay +- ley +- ▁kaj +- ham +- 马 +- ん +- fer +- sten +- 지 +- ▁saying +- ▁No +- ▁bring +- ▁zwei +- 四 +- ated +- 需要 +- ▁p +- 田 +- ح +- ▁weil +- ▁las +- ▁z +- mer +- gel +- ▁és +- 世界 +- '10' +- ber +- 孩子 +- んだ +- ет +- end +- 都是 +- ▁ta +- ▁그 +- だった +- 王 +- リ +- ▁form +- ▁dead +- ▁hour +- ▁future +- ма +- ▁bis +- ▁machen +- 物 +- پ +- лі +- ▁earth +- ▁zur +- 自 +- nd +- ful +- カ +- ▁là +- ▁government +- ▁Do +- 钱 +- ▁، +- му +- nu +- ▁play +- ▁remember +- ▁land +- ▁bed +- ▁vous +- ere +- ▁plus +- 知道 +- 两 +- 带 +- 真 +- ▁fast +- ish +- ning +- 快 +- 身 +- ▁fell +- ▁dark +- ▁held +- 时间 +- rs +- 定 +- им +- ▁As +- ▁started +- ▁lost +- op +- 主 +- ▁hi +- ▁como +- 爱 +- ▁ت +- ▁answer +- ▁Was +- 一下 +- ▁bar +- ▁один +- tor +- します +- gan +- ▁cost +- ная +- ▁fa +- nya +- ang +- ▁сто +- ой +- ▁early +- ari +- min +- ご +- ▁müssen +- su +- ▁ground +- lin +- ▁answered +- ▁ago +- red +- ▁anderen +- ▁lay +- 明 +- ▁Vi +- ▁ў +- ▁Ba +- con +- ▁stand +- 正 +- ▁但 +- 安 +- 知 +- 加 +- ▁под +- 合 +- ▁selbst +- 谁 +- 起 +- han +- ▁они +- ▁em +- ра +- ▁Ha +- ç +- ']' +- tt +- vu +- 五 +- ▁truth +- ▁Ka +- 美 +- ▁mar +- ot +- ド +- ▁мне +- 看到 +- ▁alles +- 流 +- ▁denke +- ▁try +- ac +- iz +- ait +- 人们 +- ker +- cy +- ru +- ▁clear +- ко +- 同 +- ▁Leute +- ▁working +- 克 +- ▁Ne +- ▁front +- ▁towards +- ▁replied +- ト +- port +- ▁taking +- ▁Ihnen +- ▁viele +- 火 +- 然后 +- ▁же +- 怎么 +- her +- え +- র +- ▁further +- 等 +- 那么 +- day +- 的话 +- ин +- 叫 +- ▁Dinge +- ▁fine +- ▁ad +- 市 +- வ +- ▁kon +- ▁ну +- ам +- 只是 +- om +- ▁zurück +- ▁trying +- ." +- tan +- ism +- ▁law +- O +- ▁town +- ▁было +- ▁для +- 感 +- ▁fear +- ff +- ▁Da +- 门 +- lle +- ez +- B +- 全 +- 气 +- これ +- にも +- ▁его +- ini +- ern +- まで +- ire +- 目 +- ▁ты +- 真的 +- ラ +- ; +- ▁thank +- ▁со +- ▁table +- are +- ▁ye +- ؟ +- ▁Y +- 使用 +- 中国 +- ▁есть +- ▁third +- ▁church +- ▁dieses +- ▁expect +- ú +- 作 +- います +- ex +- ▁Du +- ▁art +- 平 +- ▁Sa +- ▁diesen +- ный +- ▁sun +- ▁doubt +- ▁бы +- ▁return +- qui +- ▁system +- ▁pay +- ap +- ▁sin +- ▁era +- ▁[ +- ü +- ance +- のは +- ▁plan +- 도 +- め +- ▁maybe +- ▁soul +- ர் +- コ +- 野 +- ▁dr +- ita +- tre +- ina +- ло +- ▁muss +- ▁الم +- sen +- 通 +- ▁aan +- 请 +- ▁sound +- ring +- ▁거 +- ند +- one +- ▁Wie +- 南 +- ▁beautiful +- ▁seems +- 見 +- ах +- ▁week +- ▁brother +- ▁два +- ▁با +- ▁kept +- sha +- 時 +- elle +- あ +- ary +- gar +- ▁certainly +- ▁continued +- ▁comes +- ▁P +- ▁top +- ▁position +- ча +- 어 +- '20' +- sto +- ▁там +- ит +- ▁gu +- ▁нас +- K +- ▁suddenly +- ▁seven +- ▁thinking +- ▁А +- している +- 에 +- ab +- ▁wish +- ▁talking +- ▁ready +- ▁Mi +- ▁master +- F +- ▁service +- 自己的 +- ন +- ▁evening +- しています +- ▁view +- ▁cap +- ▁thy +- 那些 +- ▁nie +- ک +- ▁ما +- sel +- ▁subject +- 通过 +- ▁Per +- '...' +- 应该 +- ▁Ta +- ▁eines +- ▁cold +- ure +- és +- pi +- ▁dé +- 文 +- ▁longer +- ▁letter +- ▁cause +- ▁Ro +- ▁happened +- kin +- 生活 +- ды +- ▁deep +- ▁nous +- ▁если +- dan +- ▁common +- ding +- 立 +- 使 +- ▁happy +- ▁post +- van +- ▁zij +- bel +- でした +- ン +- tar +- ▁дом +- ▁ca +- んで +- ▁low +- ▁arm +- 区 +- ▁gi +- ▁cu +- 动 +- ▁pass +- ти +- ни +- то +- ble +- ▁interest +- ▁dit +- 格 +- ah +- ▁schon +- cia +- ix +- 这么 +- ê +- igen +- ▁ihnen +- 只 +- ▁john +- 米 +- une +- ▁Ihre +- ▁manner +- ▁living +- ies +- ▁To +- 它们 +- 声 +- து +- ama +- ▁natural +- ある +- 送 +- いて +- ▁hold +- ▁thirty +- ▁eight +- ▁「 +- ▁б +- ▁Welt +- mp +- 研究 +- ▁spirit +- fi +- ▁months +- された +- ▁river +- ▁அ +- ين +- ▁unsere +- 出来 +- ▁husband +- ▁watch +- G +- ক +- ▁capital +- ite +- sh +- 店 +- 不会 +- mal +- ris +- 问 +- ▁road +- 意 +- 住 +- ▁rose +- ▁fi +- los +- ▁guess +- val +- 总 +- ▁Ab +- 的是 +- ▁seeing +- ▁support +- 喜欢 +- ня +- ной +- ▁result +- ub +- ▁Ni +- 有一个 +- 我想 +- 原 +- ▁You +- ▁value +- во +- ط +- ▁deal +- ▁meet +- ▁results +- 号 +- ▁wurden +- 長 +- bra +- 今天 +- tic +- 木 +- 不能 +- ▁street +- 次 +- 希望 +- ▁vol +- 情 +- 给我 +- ▁green +- ар +- ▁denn +- ▁social +- ▁hours +- 或者 +- 科 +- ▁returned +- ▁cut +- ил +- ▁experience +- vis +- ian +- ون +- len +- ▁sub +- ungen +- ▁strange +- ud +- 元 +- ▁werde +- 戦 +- кі +- いる +- ая +- 是的 +- ▁whatever +- ▁einige +- ▁sight +- 之前 +- ▁mein +- 公司 +- 半 +- 的事情 +- ▁drei +- 言 +- ▁doesn +- 公 +- ischen +- car +- ▁その +- ▁Aus +- ей +- те +- 也是 +- 是一个 +- ▁să +- ▁fair +- ▁У +- ▁terms +- ▁food +- 気 +- ▁minutes +- ef +- 的东西 +- ▁seiner +- ▁아 +- 能够 +- ль +- خ +- ▁ex +- ons +- sche +- ▁Te +- ▁court +- ▁process +- ナ +- tel +- ▁account +- 发现 +- 该 +- 品 +- 林 +- ▁received +- ▁blood +- ▁меня +- ных +- ence +- ag +- D +- ▁followed +- ▁Ra +- ▁ho +- ▁Mo +- 干 +- ▁pri +- ロ +- 张 +- がある +- ò +- ▁feeling +- ну +- ici +- ▁Co +- 州 +- 觉得 +- ▁particular +- las +- ல் +- マ +- ▁Di +- ▁makes +- ні +- lie +- ▁はい +- ▁yo +- ▁due +- 他们的 +- ▁move +- ▁یک +- ▁english +- 第 +- line +- ā +- 女 +- ▁أن +- ▁mais +- ▁seem +- 讲 +- 代 +- 八 +- ▁Vor +- ende +- ▁damit +- ما +- 线 +- ▁act +- ▁beyond +- ria +- れ +- nen +- ▁captain +- zer +- ▁single +- ▁hair +- ▁об +- té +- ▁arms +- ▁spoke +- ▁They +- tra +- ▁suppose +- ▁có +- tas +- க் +- ர +- ▁self +- ▁Also +- ▁information +- ▁data +- ▁simple +- وا +- ста +- 之后 +- ша +- ▁wild +- ▁horse +- ige +- ▁R +- 还有 +- 一点 +- M +- king +- ▁tried +- ▁sleep +- č +- 相 +- ▁mine +- 을 +- 石 +- ée +- 社会 +- ndo +- 色 +- _ +- mes +- 量 +- 人が +- 神 +- ▁wrong +- 大家 +- ▁effect +- ▁wind +- ▁history +- ▁sit +- ▁yourself +- ور +- 才 +- 集 +- ▁eye +- 球 +- 理 +- ▁walk +- ▁fifty +- ブ +- ▁states +- 军 +- cer +- ▁foot +- ▁Um +- 们 +- レ +- hu +- cht +- ▁очень +- 台 +- だけ +- じ +- 形 +- ▁inter +- ▁Les +- ▁problem +- gri +- ▁although +- ▁fer +- ▁maar +- こ +- 空 +- ме +- 不要 +- ▁gold +- ische +- N +- 找 +- ▁G +- ▁mo +- fu +- ▁这是 +- ura +- mente +- ally +- ▁F +- 关于 +- 北 +- if +- 很多 +- î +- ▁space +- ست +- ▁когда +- ▁period +- なかった +- ▁ei +- 所有 +- ▁könnte +- ています +- んです +- ▁glad +- こと +- タ +- ▁weiß +- 六 +- ▁example +- ▁двадцать +- ▁stop +- ▁stuff +- 私 +- ▁els +- ron +- ▁reached +- 认为 +- ▁doctor +- ę +- 政府 +- 让我 +- ▁none +- nde +- ano +- ම +- ▁daughter +- ▁send +- ▁force +- のか +- use +- 进 +- tter +- ▁chance +- ras +- ▁attention +- че +- 见 +- ▁W +- 买 +- ▁fall +- ▁pot +- 教 +- ম +- room +- ▁Man +- ş +- について +- ▁она +- ▁blue +- ▁exactly +- ale +- 如果 +- ▁New +- 东西 +- né +- ▁dir +- 時間 +- ▁Ku +- ▁camp +- ▁stay +- wer +- ▁party +- ▁Si +- ▁especially +- ض +- 完全 +- 川 +- wn +- af +- ando +- fen +- ди +- ▁character +- ▁dich +- ▁except +- вер +- 布 +- ▁please +- my +- ок +- ▁Pa +- そう +- ▁Als +- ▁share +- ▁hatten +- ия +- フ +- 事情 +- ▁Leben +- ▁higher +- ▁tôi +- 阿 +- 何か +- ement +- 受 +- になる +- ▁mis +- ▁outside +- ▁following +- ▁hu +- ▁level +- ▁Me +- ные +- ▁geht +- 信 +- ру +- ▁couple +- ▁lui +- ▁necessary +- ok +- ▁Auf +- ele +- 器 +- 给你 +- 卡 +- 味 +- やっぱり +- 起来 +- це +- 话 +- ▁instead +- ▁và +- ▁opportunity +- ▁carried +- ▁nun +- eg +- nk +- ▁appeared +- ▁toward +- 食 +- 如何 +- ▁gab +- cho +- ▁respect +- 风 +- ati +- ▁south +- ▁Teil +- 美国 +- シ +- ▁added +- ž +- ▁window +- ▁如果 +- nda +- 正在 +- ▁او +- är +- ன் +- ö +- ▁control +- ating +- もの +- ▁england +- ор +- ф +- за +- ▁seinen +- ▁пере +- ▁nine +- па +- ▁questions +- ই +- 成为 +- 越 +- オ +- ▁ought +- 期 +- ▁más +- ▁save +- 九 +- 朝 +- del +- ць +- ▁Mu +- ▁late +- ▁lower +- ▁нь +- 民 +- ira +- 以后 +- ▁lived +- ▁health +- ▁knowledge +- ▁ke +- ▁Lo +- 写 +- ▁што +- ns +- ▁нет +- ▁现在 +- res +- са +- ▁customers +- ▁tra +- он +- 嘛 +- ▁dollars +- ый +- ё +- ▁Mae +- ▁Bo +- ▁gehen +- cha +- ▁wat +- ▁Po +- र +- 歌 +- ▁opened +- ks +- ار +- 取 +- ▁Im +- chi +- ▁miles +- ▁grand +- eth +- let +- また +- 少 +- ▁haar +- ている +- ▁pen +- ▁wall +- ach +- க்க +- ▁è +- 学校 +- 选择 +- ▁comp +- ▁eighteen +- ▁step +- ▁easy +- tur +- 哪 +- heit +- ば +- ▁quickly +- don +- ▁Mal +- mar +- ▁wait +- ging +- 书 +- っていう +- ky +- 回来 +- 员 +- eur +- ба +- ▁Pro +- 城 +- ▁Bi +- ien +- ▁neither +- ▁уже +- ре +- tal +- ▁fellow +- ise +- ▁group +- ▁product +- lla +- ▁Ri +- ام +- 人の +- ▁building +- ▁чтобы +- ide +- ▁因为 +- ▁goes +- ▁north +- ▁chi +- ▁jeung +- ▁Z +- ▁vielleicht +- ▁music +- ு +- 一种 +- ▁main +- ▁три +- una +- ▁paper +- 周 +- ри +- ▁bright +- ▁beginning +- க +- ным +- out +- ▁V +- ры +- tin +- 几 +- ▁hall +- L +- ▁ningal +- ▁wonder +- ▁diwawancara +- ▁forth +- ▁olohok +- ▁office +- ▁christ +- ▁sweet +- ▁möchte +- ミ +- 後 +- ذ +- ▁wollen +- stre +- stand +- ▁ihren +- ities +- H +- ned +- yi +- ▁var +- ▁wäre +- ▁figure +- ▁mhm +- ▁Ki +- 为什么 +- ▁entre +- ▁nearly +- ▁ran +- ▁dipoto +- ன +- ▁wartawan +- ▁avec +- ▁forty +- 任何 +- 制 +- ▁пра +- 十 +- kt +- 在这里 +- ке +- ▁team +- ali +- time +- ▁mon +- ida +- 夜 +- 清 +- bre +- 进行 +- ▁Mann +- lor +- med +- 的一个 +- ▁ganz +- ج +- دا +- 由 +- ▁bien +- 机 +- 本当に +- 拿 +- と思います +- 近 +- ▁Con +- 大学 +- ▁kam +- ğ +- ges +- wy +- ▁rich +- 直 +- ийн +- ▁interesting +- 的な +- ▁Mit +- ду +- ▁There +- ▁local +- ▁konnte +- или +- ▁sollte +- 急 +- ▁Am +- 波 +- 特别 +- ▁purpose +- 其他 +- 倒 +- mon +- ▁este +- ▁lives +- ல +- ということです +- ▁будет +- ан +- ▁daß +- 的地方 +- ▁Ad +- ту +- ville +- ▁key +- kel +- pan +- ص +- 巴 +- ▁một +- 村 +- 得到 +- 表 +- od +- ert +- ▁heute +- けど +- ল +- ▁Com +- ▁근데 +- ça +- 解 +- ▁game +- 接 +- ▁news +- ▁peace +- ▁weiter +- ய +- ▁pleasure +- sti +- ▁darüber +- for +- ▁situation +- ▁learn +- ▁ou +- ▁ihrer +- 局 +- 笑 +- ▁lie +- ▁안 +- バ +- 是什么 +- tie +- ▁على +- ▁silence +- ▁grow +- R +- 是不是 +- '30' +- ▁sister +- tri +- ▁sei +- 安全 +- ther +- த +- ▁class +- ید +- 音 +- 派 +- 甚至 +- ▁zoo +- ▁pan +- ▁moved +- ▁strength +- ib +- ili +- ▁sta +- ということで +- ▁modern +- ▁success +- ante +- side +- 雪 +- ▁secret +- log +- ▁Va +- ł +- 君 +- 初 +- 哥 +- ▁dog +- ▁follow +- 指 +- ani +- cher +- се +- 船 +- 切 +- 继续 +- ▁board +- 男 +- ▁comme +- ▁ru +- ▁mag +- 提供 +- ▁뭐 +- vel +- 古 +- nes +- U +- ▁och +- ▁davon +- 다 +- 中的 +- 国家 +- 雨 +- 来说 +- ▁french +- ッ +- 早 +- ▁gegen +- ро +- mit +- lichen +- ▁middle +- 两个 +- ▁project +- ▁С +- lang +- ▁uit +- ▁afraid +- どう +- ía +- ▁eat +- ▁expected +- ▁dar +- cal +- への +- ▁bear +- む +- 实际上 +- 管 +- غ +- nce +- 落 +- ▁Я +- よく +- ▁performance +- ▁Arbeit +- っ +- 难 +- ца +- ▁wouldn +- ▁ara +- tte +- ▁army +- こう +- ▁entered +- uri +- 到了 +- mas +- 字 +- ▁spring +- ▁fit +- ▁Ho +- mor +- ize +- 如此 +- ier +- ▁impact +- ▁object +- 七 +- 帮助 +- ▁national +- can +- ▁drive +- rea +- ▁cool +- ▁private +- 片 +- ” +- ▁difficult +- 在一起 +- 咱们 +- 土 +- 曲 +- ▁joy +- ized +- ப் +- ү +- ▁standing +- 其实 +- ▁progress +- ▁trouble +- 低 +- ▁floor +- tres +- ▁pu +- 選手 +- ▁только +- 東京 +- ▁ship +- ▁Nun +- かな +- 哈 +- ▁Su +- ▁break +- чи +- 一直 +- 命 +- form +- ▁mai +- ول +- ▁zusammen +- ئ +- ple +- ▁av +- ▁cho +- エ +- ズ +- ▁provide +- 救 +- pen +- ▁She +- ами +- ▁happen +- ving +- 病 +- ▁din +- 感じ +- 作为 +- zeit +- 活 +- になって +- ▁nice +- ウ +- ▁ways +- ▁american +- ▁everybody +- ▁special +- ▁smile +- же +- ▁major +- 率 +- ▁ju +- sse +- ▁chief +- ▁hem +- 理解 +- 一起 +- およそ +- 处 +- ▁sont +- ত +- ▁distance +- 罗 +- ▁И +- 看看 +- чы +- 电 +- ció +- vin +- ▁prince +- ▁dies +- ▁sign +- لا +- ▁fight +- ▁greater +- ▁That +- ен +- ▁mouth +- 朋友 +- ▁Jahren +- dig +- ido +- ▁kein +- ▁consider +- ▁demand +- ව +- 反 +- ▁sitting +- ого +- ▁written +- ▁lang +- ▁oku +- ▁Wa +- ní +- cla +- ▁remain +- ▁couldn +- 李 +- art +- dd +- ▁Eine +- 은 +- 街 +- 场 +- ▁design +- ▁spot +- ▁потому +- house +- cken +- 不知道 +- ▁vom +- すると +- ▁length +- nis +- win +- ▁ба +- 〉 +- ▁job +- ▁пять +- すごい +- ship +- 岁 +- hen +- ▁heaven +- де +- ა +- san +- 必须 +- 河 +- ▁books +- 条 +- ▁final +- of +- ▁grew +- ▁base +- 一次 +- ие +- ▁walked +- ▁isn +- ▁increase +- 官 +- ▁просто +- 奥 +- ▁society +- ▁amount +- ▁On +- 断 +- ero +- cur +- гу +- ▁trees +- ▁program +- ▁これ +- ې +- ▁был +- न +- 有点 +- ى +- ▁ம +- ations +- â +- iza +- ▁faith +- vor +- ▁heavy +- ▁immediately +- ▁battle +- ▁president +- ین +- ▁financial +- ▁What +- ▁〈 +- のが +- ▁dort +- ▁четыре +- ▁running +- ▁begin +- ▁рас +- par +- 間 +- サ +- ▁personal +- へ +- ▁action +- ха +- wan +- ▁cor +- ▁тридцать +- sie +- ▁places +- ▁development +- ▁visit +- ▁во +- ▁train +- ▁eu +- ▁mary +- ▁learned +- ▁không +- ▁write +- ▁cre +- ▁ohne +- ですよね +- ▁Gu +- ст +- 一个人 +- 东 +- 最近 +- 首 +- ется +- ▁darauf +- nas +- ▁hot +- さんが +- ▁died +- 地方 +- 支持 +- ую +- ▁laid +- ▁perfect +- 她的 +- ces +- ries +- ▁queen +- 工 +- 却 +- そして +- ▁Ar +- 决定 +- できる +- ▁einmal +- 서 +- 足 +- ▁command +- ▁safe +- ии +- yu +- される +- ▁slowly +- 连 +- dra +- ә +- ▁price +- ▁Ti +- ▁super +- 玩 +- ı +- ▁Mar +- ий +- ▁quick +- ▁wood +- ▁glaube +- ▁сейчас +- ▁add +- 女性 +- ram +- ndi +- ▁estis +- liche +- ▁build +- 只有 +- 实 +- っている +- ▁sales +- ▁nga +- ▁Zu +- ▁stopped +- ą +- 队 +- dro +- ▁sobre +- キ +- 森 +- ▁嗯 +- ▁finally +- 红 +- 思 +- nia +- ▁various +- ▁conversation +- 差 +- 当然 +- ▁color +- ▁changed +- ▁age +- 推 +- ө +- цы +- ▁married +- 远 +- ▁west +- ▁caught +- mm +- 酒 +- ▁gentleman +- vid +- 深 +- mus +- ▁més +- ▁cette +- lik +- 战 +- ▁hast +- clock +- uch +- 所有的 +- 强 +- ве +- ▁или +- ▁film +- ▁desire +- shi +- mel +- ▁straight +- ▁study +- ins +- メ +- ▁pi +- isi +- ▁может +- вы +- ▁model +- ▁warm +- ▁ging +- rie +- sion +- ▁которые +- ▁tree +- 当时 +- ▁ill +- த்த +- ▁cross +- ▁hay +- ▁ад +- dem +- ▁built +- ▁Land +- ▁science +- ▁бол +- ▁J +- ▁bra +- ட +- ans +- 亚 +- ところ +- ▁danger +- 商 +- 努力 +- ska +- ▁kwa +- 啦 +- 每 +- ▁talked +- ▁remained +- 部分 +- 老师 +- 士 +- 室 +- 变 +- sis +- 番 +- ▁paar +- schen +- ▁giving +- gang +- 話 +- そうです +- ▁gonna +- ▁All +- pp +- ité +- ▁quiet +- 让我们 +- ▁rate +- ного +- 转 +- ▁york +- ▁meiner +- 热 +- ▁их +- rus +- チ +- ▁الأ +- ɣ +- ▁tre +- 我是 +- 事件 +- ▁wrote +- ▁picture +- ▁loved +- ip +- yan +- аў +- ▁Diese +- ps +- 整个 +- ▁Ur +- ▁tr +- ▁Ko +- ▁snow +- ▁gemacht +- ▁waiting +- ▁uncle +- ث +- tive +- ▁семь +- 肉 +- ▁那么 +- ▁fort +- ▁yang +- ▁race +- ito +- 失 +- ▁nichts +- ▁For +- ▁london +- ▁кто +- мо +- ▁これは +- ▁minute +- ать +- ino +- ▁boys +- 报 +- ski +- бы +- 千 +- ▁dinner +- til +- ▁gar +- ▁march +- ය +- хо +- 房 +- ▁fourth +- rem +- 边 +- ▁works +- ▁sprechen +- ▁broken +- tus +- 关 +- ▁tatsächlich +- ち +- ▁below +- 有什么 +- ▁đ +- ties +- 感觉 +- ona +- ▁piece +- xi +- ▁similar +- より +- ▁according +- 保 +- ▁simply +- 最后 +- ▁trust +- 往 +- 达 +- なく +- ம +- ▁seinem +- ḥ +- ▁regard +- ug +- ▁hardly +- 如果你 +- ▁zwischen +- ▁direction +- ▁seu +- ▁drew +- 世 +- lä +- ▁mil +- сан +- ▁brain +- ▁corner +- 真正 +- cle +- 种 +- 以上 +- 关系 +- ▁using +- 这里 +- ▁summer +- اس +- ▁buy +- ich +- ▁language +- ▁Ke +- ▁impossible +- ▁sus +- 不同 +- چ +- ше +- eren +- ▁Tu +- ▁bon +- 权 +- ▁hätte +- ade +- ▁Tag +- ▁beauty +- ▁lips +- ue +- 画 +- 第一 +- ▁evil +- ▁Now +- õ +- 单 +- ▁france +- ▁garden +- ▁بود +- ▁month +- bil +- ▁station +- ec +- さんの +- ev +- 进入 +- ▁lead +- ré +- int +- 包 +- 県 +- sco +- 기 +- ▁Ga +- gehen +- ▁denen +- ▁presence +- amente +- ▁girls +- ▁opinion +- ▁touch +- dia +- そういう +- ▁gran +- 的问题 +- ▁fresh +- ▁آن +- 过去 +- ▁united +- 系统 +- field +- ▁Hu +- gra +- 야 +- rit +- ▁mas +- ▁political +- ▁showed +- とは +- ят +- fo +- یم +- ▁ма +- ▁г +- ser +- ▁looks +- ться +- 眼 +- ず +- ▁Frage +- 以及 +- را +- ▁vier +- ▁나 +- ▁гэта +- ▁placed +- ney +- パ +- pu +- ▁brown +- 冷 +- 発 +- ▁сорок +- ал +- cra +- 根 +- met +- ▁Pe +- ato +- ▁我想 +- 星 +- ava +- ción +- ▁growing +- ei +- ions +- 省 +- ▁risk +- 了一个 +- ▁că +- ▁В +- ▁sick +- ツ +- ▁bank +- ▁переулок +- ▁current +- ▁decided +- ▁soft +- ▁village +- 板 +- ですが +- 夫 +- 我就 +- 今年 +- ব +- ▁filled +- cent +- 青 +- 好的 +- ▁hit +- 実 +- ▁еще +- ▁speaking +- nh +- ih +- ют +- ▁dazu +- ice +- گ +- 其 +- 图 +- ral +- น +- V +- ▁вас +- ර +- ▁hin +- ▁Hi +- ▁Ca +- ▁led +- 来了 +- kon +- ▁ஆ +- ou +- ▁برای +- 装 +- schaft +- ▁wasn +- ▁cent +- ▁har +- ▁muri +- iti +- эр +- ▁weeks +- 上的 +- 找到 +- ▁zich +- ▁pero +- 非 +- well +- ▁inside +- ial +- like +- ▁Sta +- ▁Ihr +- ▁writing +- berg +- ▁mentioned +- 在那里 +- ▁expression +- 出去 +- ▁trade +- 脚 +- ▁born +- fin +- ▁pain +- ▁á +- ▁worked +- ika +- 風 +- ament +- 比较 +- 交 +- ▁twelve +- 这样的 +- ▁type +- ▁cy +- ده +- ▁kan +- ▁Jahre +- ▁revenue +- nte +- ▁rain +- ▁serve +- ова +- ute +- ▁reach +- よね +- ▁H +- oli +- ▁meant +- ▁würden +- har +- 妈妈 +- ▁direct +- ▁இ +- ң +- やって +- ▁plain +- ▁för +- ▁Ru +- ▁pleased +- ▁win +- ▁winter +- ▁influence +- ▁parts +- ▁doch +- isch +- kat +- uk +- ▁mij +- ▁Lu +- 車 +- 你可以 +- ▁naar +- ▁worth +- via +- 这一 +- ▁community +- ▁thoughts +- 突然 +- 》 +- 包括 +- ▁kommen +- hin +- ▁しかし +- ▁raised +- ▁wide +- ▁col +- ▁große +- ▁của +- дзе +- eb +- 传 +- リー +- ▁haven +- ▁chair +- ▁bri +- ▁field +- fall +- 亲 +- ▁umu +- 松 +- 里面 +- wood +- 存在 +- 让你 +- 皮 +- 零 +- ▁boat +- ▁forget +- bro +- ▁exclaimed +- 代表 +- uka +- ded +- 听到 +- '11' +- 血 +- ут +- ▁carry +- ▁chúng +- mmer +- mis +- das +- 华 +- 别人 +- ió +- ▁Geschichte +- ford +- bb +- ▁broad +- ▁od +- ▁paid +- ▁likely +- ▁condition +- ▁ersten +- ▁afternoon +- 以前 +- ▁someone +- ▁grave +- ▁și +- put +- ای +- ▁genau +- 々 +- ▁dream +- сы +- ▁Fa +- ых +- න +- ▁мо +- ▁На +- 说话 +- ком +- ▁ourselves +- ▁waar +- ô +- 草 +- ▁band +- 我觉得 +- ▁content +- ▁wenig +- ora +- 自然 +- 家庭 +- 方面 +- ে +- 黑 +- ▁imp +- ▁industry +- ▁terrible +- dos +- 尼 +- ым +- rig +- ▁report +- ▁somebody +- 要求 +- ▁allowed +- さんは +- ▁path +- rá +- ▁pale +- 那里 +- ▁appearance +- ssen +- ▁mark +- ▁worden +- umu +- よう +- 学生 +- ments +- мен +- 院 +- ▁க +- أ +- sur +- 円 +- ▁journey +- жа +- ▁usually +- ▁دو +- 卖 +- ную +- ves +- ите +- gli +- 的方式 +- だと +- 调 +- ▁listen +- 警察 +- び +- wen +- ▁Frau +- 兰 +- tik +- グ +- ī +- 温 +- '50' +- 線 +- んですよ +- ▁vast +- يا +- ▁anar +- ▁material +- ە +- ▁generally +- ට +- ▁fait +- ▁enemy +- пер +- ▁area +- ▁struck +- dar +- aient +- 重要 +- tir +- ▁rock +- ような +- 喝 +- 的に +- ▁shot +- ると +- ▁finden +- ▁sorry +- ▁gra +- ▁evidence +- ▁آ +- া +- ний +- 我认为 +- ход +- ula +- sam +- 了解 +- ▁daran +- wu +- uma +- ▁Gi +- ▁Nach +- いました +- ков +- ▁easily +- 件 +- ▁individual +- 节 +- ▁duty +- 座 +- ▁tout +- mie +- 过来 +- ▁những +- ▁unless +- まだ +- 它的 +- ু +- ros +- ▁bill +- ▁fifteen +- heid +- mann +- ▁Her +- lt +- 成功 +- iv +- pri +- اد +- iya +- それ +- lia +- ▁laughed +- ▁шесть +- ▁надо +- ▁clean +- ▁normal +- sar +- ▁ن +- ▁tea +- ala +- ▁kar +- ami +- var +- ながら +- 恩 +- oc +- ▁gesagt +- llen +- 是否 +- ▁meeting +- 纳 +- ▁create +- ▁ஒரு +- ҡ +- ▁Dann +- ▁ré +- ▁products +- ▁Et +- ▁вам +- tis +- ▁fun +- ▁während +- 了吗 +- ▁focus +- ses +- あの +- ▁fond +- ▁sí +- んですけど +- あと +- ▁star +- ▁Weise +- 健康 +- ▁considered +- َ +- ▁não +- 油 +- स +- せ +- んですね +- ▁Weg +- ▁broke +- stra +- ▁moving +- ▁sah +- set +- ▁быть +- 此 +- ▁leaving +- ▁supposed +- stan +- ぶ +- ▁million +- ▁services +- ひ +- ▁charge +- ▁ber +- ▁drink +- ▁test +- ская +- ▁enter +- ▁gerade +- 技术 +- ▁sudden +- 式 +- wr +- ги +- ista +- ▁wonderful +- त +- ▁season +- ▁어 +- '",' +- ▁difference +- ▁jo +- ▁আ +- 未 +- اء +- ▁knows +- ▁geben +- ▁rep +- きました +- ified +- 为了 +- leg +- iri +- ▁mad +- ▁positive +- ▁grace +- ▁пры +- dri +- ▁stone +- なんか +- ▁spent +- 我在 +- ▁everyone +- W +- als +- られる +- ē +- ▁bat +- ▁Kinder +- 修 +- 方法 +- э +- ▁principal +- 怎么样 +- ▁sagt +- ▁iron +- dor +- pt +- ▁том +- ก +- 坐 +- ▁Ve +- アメリカ +- ▁ball +- ▁fünf +- ▁Jo +- ▁Fall +- ped +- ▁energy +- ▁sixty +- ▁honor +- 雷 +- ius +- لی +- ▁min +- رو +- ジ +- ▁ganze +- ▁بر +- というのは +- 间 +- чу +- ▁ப +- ▁imagine +- zar +- ţi +- ▁covered +- ▁exist +- 人的 +- ▁latter +- ▁receive +- ▁wollte +- ▁cast +- 任 +- ▁daar +- 不同的 +- nor +- ▁moral +- vers +- ▁global +- ▁fo +- ▁esta +- pro +- 強 +- eu +- ように +- たい +- تر +- ▁natürlich +- ぐらい +- bed +- сти +- uru +- ▁allow +- ே +- ▁closed +- ▁cases +- 离 +- nos +- ▁bound +- ஸ் +- but +- 把它 +- ▁kun +- ▁shut +- 左 +- ▁dafür +- ▁arrived +- ▁huge +- pf +- ▁technology +- ▁besides +- ▁term +- ா +- ット +- ことが +- ph +- 自由 +- 改变 +- ▁Kon +- 服务 +- 二十 +- ニ +- 许多 +- 注意 +- ろ +- ay +- ▁At +- cin +- ▁entirely +- ▁tears +- ですか +- ojn +- 教育 +- ▁particularly +- ▁pod +- ▁девять +- 有人 +- ey +- ▁hill +- 直接 +- 试图 +- ▁ل +- ▁großen +- ão +- ▁Ze +- ▁而且 +- ella +- ▁actual +- looking +- ata +- bli +- по +- のです +- all +- ▁І +- 方式 +- 状況 +- ants +- ▁anti +- pre +- ▁Our +- ▁turning +- ació +- しか +- هم +- 学习 +- ▁ко +- 一天 +- 步 +- yn +- ▁lassen +- んですが +- rt +- yon +- 准备 +- ビ +- 维 +- 药 +- ▁cat +- ward +- ▁percent +- ▁ahead +- ▁notice +- spe +- 离开 +- 人类 +- ▁justice +- ▁sollten +- iga +- ▁spend +- pul +- ▁significant +- ▁fest +- ▁ي +- ▁needs +- 收 +- hand +- ай +- ical +- ▁sand +- だから +- ட் +- лю +- ▁sky +- ▁killed +- 语 +- ▁dos +- ▁bak +- nder +- eva +- 를 +- ▁Beispiel +- rà +- лу +- 留 +- 考え +- ▁holy +- 宝 +- 百 +- ▁Dr +- ▁Pi +- ▁Go +- ส +- ▁europe +- ▁rise +- ▁animal +- های +- 完 +- ▁macht +- ение +- ▁jeder +- ▁labor +- ▁бо +- ▁John +- ▁deux +- 参加 +- ▁island +- ▁kids +- ▁size +- ▁occasion +- cio +- ▁beside +- ぎ +- iki +- ▁park +- ул +- illa +- ви +- og +- 中心 +- され +- 福 +- board +- 島 +- ▁jack +- ▁complete +- 提 +- ▁east +- ▁religion +- ▁note +- ein +- ▁usual +- ture +- ▁horses +- ▁police +- ▁youth +- ▁unserer +- vil +- ▁uma +- ▁wise +- 短 +- ハ +- ón +- ola +- 角 +- tru +- ▁crowd +- ▁george +- ▁welche +- ▁пред +- ḍ +- ▁gute +- ▁silent +- 吉 +- 兵 +- 跑 +- していた +- 影响 +- ム +- qu +- ▁пятьдесят +- 查 +- ▁opportunities +- hr +- ▁tend +- lon +- ▁peter +- av +- になった +- ima +- 师 +- ▁education +- 見て +- 好好 +- ▁К +- ▁« +- лен +- bert +- ți +- ▁countries +- ボ +- ably +- ▁sam +- ability +- ▁нам +- 你会 +- கள் +- ▁blind +- ど +- ▁trans +- '40' +- ▁восемь +- ▁former +- ▁مع +- vy +- nan +- ▁ただ +- ▁molt +- ▁tut +- ▁су +- ▁“ +- 完成 +- ▁sicher +- ▁prop +- ▁noble +- ▁ubu +- ▁tax +- ▁della +- ▁meinen +- lé +- mun +- 案 +- mba +- 客 +- ▁ziemlich +- เ +- тор +- ▁Hand +- ▁warum +- ▁lange +- ман +- ▁letters +- ▁была +- ▁reading +- ▁Denn +- ▁parents +- 演 +- 配 +- ▁Car +- rei +- ▁attack +- ▁чем +- лся +- teil +- ▁surprise +- 来自 +- にある +- 破 +- ▁heraus +- pon +- 文化 +- 走了 +- ▁aunt +- ▁needed +- ▁ngo +- ▁slow +- low +- 数据 +- ▁offer +- といいます +- 至 +- ▁pleasant +- ▁list +- ett +- arbeit +- 罪 +- ▁entire +- ৰ +- 无法 +- uta +- ▁lines +- sent +- ▁Dies +- 만 +- ▁merely +- ▁cry +- ▁está +- ▁ac +- ierte +- iro +- tha +- mbo +- ▁worse +- ▁pra +- 待 +- kla +- ▁cha +- 另一个 +- huh +- tag +- ▁unto +- があります +- ▁absolutely +- ▁marriage +- tim +- 跳 +- ened +- ▁п +- sia +- 算 +- ▁gesehen +- ▁ни +- ▁original +- ▁этом +- nou +- ▁tall +- ネ +- ▁based +- aka +- ร +- ov +- 管理 +- 似乎 +- ▁ideas +- ador +- mut +- bla +- ▁этого +- 我会 +- ые +- نی +- ▁هو +- ▁store +- 取り +- 全部 +- 也不 +- ▁kandi +- 获得 +- ▁somewhat +- 대 +- ▁kleine +- ▁watching +- 我要 +- 经济 +- ▁forest +- 原因 +- ▁weak +- kommen +- ats +- 問題 +- ▁takes +- шы +- 总是 +- தி +- سی +- ▁steps +- ▁nog +- ▁всё +- rat +- 利用 +- ▁mà +- 一定 +- 政治 +- ▁Or +- ▁đó +- ▁members +- ▁silver +- ▁discovered +- 你在 +- ге +- án +- ▁ع +- 割 +- า +- 好了 +- ▁including +- 科学 +- ский +- tta +- эл +- ▁tan +- ▁property +- тер +- ▁bow +- け +- ▁appear +- 计划 +- gal +- itat +- 通常 +- ▁draw +- 飞 +- 守 +- 掉 +- 谷 +- 未来 +- ▁serious +- ▁bottom +- ils +- rn +- night +- ▁circumstances +- ▁是的 +- vre +- ▁Geld +- اب +- ▁escape +- ▁chapter +- ét +- ▁movement +- J +- ▁proud +- ell +- “ +- ▁fortune +- ▁customer +- そ +- nden +- tung +- ▁iz +- лы +- ▁breath +- ů +- ▁san +- 满 +- ▁ph +- ▁Jahr +- werk +- 发生 +- 好像 +- ▁Wo +- ▁وال +- ▁thick +- ▁mountain +- 党 +- tz +- 可能会 +- ū +- ▁changes +- ения +- ав +- ▁sondern +- 故事 +- бо +- ი +- ந்த +- ケ +- ▁prepared +- ▁sad +- ▁Stadt +- ▁completely +- 母 +- 感染 +- 迪 +- ▁fish +- dis +- ▁mort +- ete +- до +- ▁points +- 击 +- ▁besser +- ▁gw +- oni +- rwa +- ▁dels +- oma +- ев +- ▁increased +- ▁bij +- ▁america +- ▁Herr +- । +- ▁instant +- ica +- ▁Moment +- ▁Über +- ▁где +- rin +- ▁erste +- ▁named +- ரி +- 如 +- モ +- ▁conditions +- 停 +- ▁guard +- 리 +- lic +- ▁здесь +- ▁Ya +- bit +- ис +- ස +- 共 +- tek +- 夏 +- ep +- ын +- ُ +- ▁mijn +- これは +- ظ +- 几乎 +- ட்ட +- 精 +- ▁effort +- ▁judge +- 仍然 +- ▁habit +- ▁credit +- sim +- 牛 +- ▁jesus +- 江 +- ▁тем +- ▁afterwards +- ม +- ▁ok +- lung +- 信息 +- 追 +- ▁cal +- най +- ▁sono +- 皆さん +- ное +- ▁gives +- aga +- 忙 +- '12' +- ▁advantage +- ně +- rum +- 谈论 +- itu +- ray +- 毛 +- 素 +- ret +- ▁research +- ▁meaning +- ator +- ▁zou +- ці +- lay +- ック +- ▁Grund +- ▁glass +- ▁nobody +- ▁video +- kle +- ▁Ende +- gy +- 程 +- 感到 +- イン +- 养 +- හ +- ▁Ort +- ▁cash +- hir +- ▁count +- ▁inform +- ▁また +- ▁neue +- encia +- 歳 +- lü +- ▁taste +- ▁porque +- ▁box +- ▁fin +- ற +- ことを +- ân +- ▁watched +- ▁college +- ▁막 +- ▁York +- ▁того +- ▁storm +- ▁mass +- みんな +- 约 +- 我们在 +- ▁weather +- 拍 +- े +- å +- 排 +- ▁scene +- ▁laugh +- ▁dry +- ▁total +- 确实 +- ということ +- zin +- 对于 +- ▁beiden +- ▁memory +- ▁się +- ▁Els +- ▁madame +- 其中 +- '15' +- তা +- mak +- ▁events +- 'off' +- ▁greatest +- тай +- ▁aba +- 界 +- ping +- 型 +- press +- ▁Sache +- ▁speech +- ▁pick +- ados +- ▁played +- ▁companies +- idad +- ▁passage +- 下来 +- こんな +- ▁weg +- ▁benefit +- ▁alt +- ▁War +- 退 +- light +- 视 +- ▁economic +- ▁тут +- ▁edge +- ▁international +- gue +- net +- 晚上 +- 照 +- 医生 +- ▁Bar +- ева +- ▁record +- bol +- ロシア +- zel +- 屋 +- ▁fallen +- ▁tak +- ▁animals +- 超 +- ż +- ほど +- ▁تا +- odd +- ▁我是 +- ▁kill +- 菜 +- ක +- ille +- ación +- ს +- 今日 +- ▁app +- ▁tone +- cos +- ▁shore +- ▁balance +- abo +- ▁إلى +- ▁dia +- ▁solid +- ın +- 香 +- 勝 +- 思い +- wed +- ো +- 肯定 +- zy +- ری +- ぐ +- ▁denken +- ux +- rang +- kul +- äh +- ▁spread +- kar +- ノ +- ily +- table +- ▁moon +- 班 +- 娘 +- 端 +- ад +- over +- ua +- эн +- じゃない +- 昨日 +- なら +- wing +- 服 +- die +- ще +- 因 +- ▁için +- ▁sharp +- tem +- いく +- ▁без +- ▁jeden +- sk +- 苦 +- vad +- eta +- fre +- 시 +- ▁comfort +- ப +- mb +- ɛ +- ран +- 云 +- 回到 +- ▁understood +- 機 +- ▁الت +- ▁Haus +- 称 +- 树 +- ガ +- ▁ihrem +- ▁های +- ▁можно +- ▁search +- iko +- тра +- vé +- 费 +- ▁bag +- られた +- ▁honour +- 经常 +- ▁tom +- ría +- டி +- 無 +- ▁casa +- 洗 +- テ +- ▁bekommen +- ▁shape +- 必要 +- rent +- 望 +- ▁fla +- 非常に +- ▁cover +- ▁даже +- 井 +- 員 +- 换 +- ▁kommt +- ved +- ちゃん +- 武 +- 是个 +- ▁exp +- ▁interested +- ▁prove +- 나 +- qua +- ▁ben +- 全国 +- ▁believed +- ▁fixed +- म +- ▁brave +- law +- ▁surface +- рэ +- ▁double +- ▁reply +- 場 +- प +- dir +- ني +- ▁Daten +- ▁dress +- ▁Gefühl +- от +- pped +- 的时间 +- ▁song +- some +- ▁sau +- ▁stock +- ▁If +- मा +- ▁nó +- ara +- ย +- ▁laws +- ▁square +- ▁bru +- ير +- ここ +- ▁gehört +- ▁professor +- ▁emp +- ▁excited +- ▁لل +- ▁Unternehmen +- stone +- 玉 +- 市场 +- ▁finding +- ▁sing +- ith +- ▁environment +- gg +- ▁schnell +- enden +- gera +- 最终 +- 刘 +- ▁popular +- ▁ont +- ▁ancient +- স +- 联系 +- asi +- 帮 +- ▁royal +- 右 +- ▁pie +- став +- 身上 +- ▁leben +- стра +- ▁seventy +- ▁parte +- 个人 +- ▁throughout +- ▁Fi +- ▁いや +- ▁ces +- 大きな +- ер +- ▁smoke +- 求 +- bri +- 基 +- ນ +- டு +- ▁markets +- ▁These +- ため +- ▁lake +- '0' +- ▁bal +- ▁mere +- 接受 +- ▁driven +- 密 +- 登 +- 之间 +- ▁persons +- ▁người +- ▁explain +- ▁seat +- ▁putting +- ▁af +- 鱼 +- мы +- imi +- ார் +- ▁pos +- ▁telling +- ▁час +- ▁其实 +- ▁بال +- 龙 +- ▁region +- ▁center +- ▁Ĝi +- 系 +- ▁walls +- ▁century +- ▁mor +- ▁mid +- ว +- ▁club +- nel +- 便 +- ▁pat +- ▁range +- ▁determined +- ering +- ▁Han +- ▁prevent +- ▁author +- ப்ப +- ▁sold +- 呃 +- ▁எ +- 音乐 +- ▁civil +- ▁bas +- com +- aŭ +- 黄 +- ▁accept +- ▁stage +- din +- ▁narrow +- ▁foreign +- ob +- ▁neck +- 害 +- ▁رو +- ▁issue +- hor +- 解决 +- 发展 +- tig +- ▁production +- ▁yellow +- ▁soll +- 证 +- ▁ladies +- ▁hurt +- ▁perfectly +- ▁fill +- und +- 各 +- tant +- 法律 +- 就会 +- jen +- 杀 +- ол +- 告 +- их +- ▁german +- ▁你知道 +- tura +- 跟我 +- 付 +- ▁plant +- 頭 +- ▁wel +- ина +- 想要 +- 大会 +- ِ +- мі +- вед +- ▁approach +- recht +- gas +- ▁access +- ▁St +- ▁familiar +- ▁Ce +- 去了 +- ▁proper +- 常 +- ▁tri +- との +- ▁hotel +- 伤 +- শ +- ▁Dar +- ▁learning +- ▁Ju +- 塔 +- ▁numbers +- ▁fan +- ▁bla +- bin +- ast +- とも +- rar +- 有很多 +- ▁stick +- ual +- ▁hebben +- gestellt +- ▁因此 +- ী +- pin +- ත +- ▁students +- 対 +- lli +- 站 +- りました +- 投 +- ▁wished +- ▁paris +- ▁iki +- ▁offered +- ▁19 +- 关注 +- ▁cloud +- тар +- ▁grass +- 城市 +- سا +- rec +- ур +- war +- ▁observed +- ▁هم +- ▁tam +- ii +- 之一 +- ▁favor +- れば +- ▁meinem +- 乱 +- ته +- ▁confidence +- ▁agree +- 父 +- 女人 +- cour +- ?" +- でしょう +- 少し +- ▁som +- twa +- osa +- ea +- ▁keinen +- له +- ▁degree +- ▁который +- ▁beneath +- 秋 +- ▁était +- فر +- ▁mental +- ▁Problem +- work +- 場所 +- tā +- eux +- biri +- ▁marry +- 微 +- ün +- ged +- ▁quality +- ▁Seite +- ła +- 结果 +- лись +- ▁firm +- ▁baby +- 奇 +- ları +- rah +- ▁clearly +- ▁oil +- 做的 +- ▁promise +- ▁roman +- ▁한 +- gin +- lid +- 伊 +- チーム +- れる +- '100' +- ද +- ▁farm +- ▁человек +- ▁finished +- ًا +- ▁darkness +- ego +- leri +- lum +- ▁clothes +- ্ +- '60' +- 死亡 +- 久 +- مان +- цца +- 保持 +- っと +- 迷 +- 俺 +- ▁produce +- ▁happiness +- ▁Elle +- ▁rule +- ▁کار +- 是我 +- みたいな +- ▁playing +- ▁gas +- 列 +- ▁шестьдесят +- 节目 +- たら +- tet +- uz +- ane +- ▁تو +- وم +- को +- 家族 +- ▁golden +- ▁あっ +- 相信 +- ▁உ +- ▁rein +- 选 +- ▁scarcely +- ▁seit +- 你就 +- 几个 +- 友 +- 一般 +- ▁equal +- ▁knowing +- 回家 +- tern +- ▁Ex +- ச +- 役 +- 回答 +- 果 +- ▁management +- ▁lose +- ール +- ná +- ▁brand +- ▁sal +- 啥 +- ▁könnten +- ▁可是 +- зна +- ▁opening +- 試合 +- ター +- 人生 +- morrow +- ▁drawing +- デ +- ▁operating +- ▁bạn +- ▁profit +- ▁Gra +- ▁paul +- 让他 +- rak +- оч +- glo +- ette +- ß +- ▁loss +- ▁stories +- ▁wine +- ▁sell +- があった +- 电话 +- hm +- 脸 +- ▁attempt +- ▁address +- ▁bay +- uku +- 町 +- ▁leaves +- ▁courage +- spir +- ▁media +- ▁leading +- ▁schwer +- ▁ses +- 的小 +- ▁efforts +- ▁soldiers +- ▁surely +- ▁San +- hel +- lip +- rü +- iger +- ▁Hier +- ві +- pot +- 电影 +- ▁är +- ▁passiert +- ▁Але +- än +- ▁etwa +- 他在 +- 春 +- ow +- non +- ▁produced +- ▁flowers +- ▁valley +- ▁earlier +- ▁Ik +- cie +- 人は +- ▁holding +- ▁eigenen +- 理由 +- لي +- ▁Mr +- ▁sword +- ▁supply +- bot +- ▁được +- ▁weight +- 副 +- ▁さあ +- 주 +- 网 +- 视频 +- 妈 +- ி +- ▁check +- ▁walking +- ▁flu +- 連 +- 告诉我 +- ▁Europe +- 枪 +- ▁additional +- 这一点 +- ls +- ▁pla +- ிய +- 助 +- pit +- ▁fra +- 因为我 +- ena +- ification +- eau +- ako +- ▁choice +- による +- 骨 +- бу +- ▁vain +- 自分の +- ▁decision +- cing +- ▁如果你 +- 背 +- ▁Б +- ▁gro +- 라 +- ▁powerful +- 弱 +- 地区 +- лан +- 陈 +- きた +- 拥有 +- 東 +- 这样做 +- ▁welcome +- ▁recent +- lam +- 残 +- ▁happens +- ▁structure +- 在这个 +- fan +- lit +- وی +- ▁formed +- ▁han +- ▁speed +- ddy +- zione +- ▁meisten +- ా +- 是在 +- '14' +- 狗 +- lí +- 로 +- ▁gli +- ▁seek +- 一番 +- 组织 +- rio +- ▁nation +- tat +- ▁houses +- ▁travel +- ▁uz +- ▁тоже +- ▁bird +- ▁passion +- ▁directly +- 去年 +- ▁그냥 +- である +- ious +- lı +- mul +- вод +- ▁basis +- 明天 +- прав +- ▁Can +- ▁platform +- ▁як +- aire +- どんな +- 的这个 +- cker +- The +- ists +- 治 +- gna +- serv +- ▁threw +- ▁dropped +- ▁divine +- ▁deze +- ▁그런 +- 这次 +- んですか +- 读 +- ▁flat +- ง +- ▁Buch +- 考 +- ир +- uza +- 考虑 +- dol +- න් +- ▁deliver +- ак +- vá +- かった +- 는데 +- ▁شد +- スト +- 」。 +- mat +- fully +- ugu +- hör +- ▁military +- ▁letzten +- 睡 +- ▁birds +- ▁sake +- ▁lying +- ください +- ▁Fe +- жи +- ▁required +- ▁existence +- ▁ничего +- ▁addition +- 横 +- 気持ち +- ▁contract +- 爸爸 +- ् +- ள +- バー +- stu +- из +- bie +- бе +- ▁passing +- 不了 +- try +- 若 +- ▁ibi +- ▁declared +- セ +- 唱 +- ▁Par +- уч +- ▁конечно +- ▁эти +- ▁anybody +- 身体 +- ▁coast +- aux +- tum +- きょう +- си +- ▁cri +- কে +- ▁내가 +- ныя +- ▁available +- rich +- क +- 建 +- '18' +- ▁glance +- ▁track +- ▁unser +- 尽 +- ▁page +- بر +- ること +- bl +- 良 +- chten +- mic +- ▁understanding +- ▁orders +- ▁practice +- 源 +- су +- 带着 +- ▁seva +- ▁wichtig +- vas +- sin +- 三个 +- bau +- zwe +- ole +- kuru +- 愿 +- 人家 +- stat +- mond +- こちら +- she +- ▁你们 +- ▁william +- ▁caused +- stein +- ▁christian +- ible +- ient +- ▁Mas +- ▁catch +- cas +- ▁Wasser +- ▁guy +- ction +- 時に +- ▁uno +- ▁vint +- 脱 +- 轻 +- ▁eighty +- ▁allen +- 封 +- ▁şi +- ▁host +- ▁fly +- ▁– +- rate +- rick +- ▁Д +- gil +- uko +- ▁bedeutet +- ▁hung +- ▁ears +- ▁prison +- ▁più +- ▁ф +- ▁двести +- more +- 例 +- ▁muy +- 前に +- ▁text +- ▁centre +- ▁presently +- 熱 +- 情况 +- ▁wahrscheinlich +- gle +- ポイント +- によって +- ▁Augen +- вал +- genommen +- 普 +- อ +- chan +- gre +- 社区 +- ▁že +- ▁này +- 托 +- ▁curious +- になります +- ön +- 해 +- ▁larger +- jar +- ▁этот +- ▁pel +- ін +- ▁shows +- ▁voi +- 新的 +- つの +- geben +- 类 +- 夫人 +- 处理 +- ket +- ▁instance +- ▁colonel +- ▁Okay +- quer +- ungs +- ▁net +- ▁indi +- 比如 +- 年前 +- thi +- 错 +- ▁ре +- すごく +- nne +- 第二 +- ▁hath +- 자 +- க்கு +- 及 +- ▁pr +- 这是一个 +- 首先 +- igkeit +- ▁population +- 면 +- ▁pentru +- 了我 +- cen +- ▁vision +- 的事 +- ▁accident +- じゃ +- ▁matters +- ▁elle +- 解释 +- 返 +- ▁richtig +- 也许 +- 群 +- व +- 极 +- ▁spoken +- ▁indian +- ▁kwi +- ▁coat +- ▁hearing +- っています +- ▁gets +- ざ +- nic +- 级 +- ▁shown +- ▁هذا +- ▁hospital +- kes +- での +- би +- ロー +- ी +- ▁officers +- ▁port +- ais +- 一切 +- ग +- ▁kleinen +- ▁statements +- 回去 +- ▁cro +- 刚 +- ▁Bei +- 데 +- ative +- ▁gre +- ним +- '19' +- 不好 +- 办 +- 乐 +- 沙 +- ▁shadow +- ہ +- ▁fool +- 团 +- ▁今天 +- vert +- ▁calm +- 担心 +- bye +- ▁motion +- stru +- プ +- 現在 +- ▁ook +- ▁faire +- ▁aux +- ▁thin +- ▁basically +- まして +- ▁shook +- 事故 +- 保护 +- গ +- ▁henry +- ▁member +- lis +- ▁thanks +- ▁aware +- ▁время +- あります +- されました +- eri +- ▁opposite +- ო +- ▁pocket +- ▁task +- ▁willing +- ▁successful +- فت +- 秒 +- ▁bread +- uw +- চ +- ▁ama +- 变得 +- ▁tem +- なので +- ▁trong +- ▁extra +- ▁religious +- ▁люди +- ▁strategy +- ▁Har +- یه +- ▁remains +- ▁cousin +- ус +- ▁إ +- 票 +- 挺 +- などの +- 不用 +- ぞ +- ▁fre +- ですよ +- ‘ +- 游戏 +- ▁beat +- ese +- dde +- 而不是 +- 苏 +- ▁guys +- ▁Aba +- ▁central +- 最高 +- それを +- ches +- jan +- 容 +- ▁officer +- pic +- ▁wants +- ched +- ▁related +- ▁bw +- ふ +- ▁noise +- 杰 +- ▁possession +- 開 +- lat +- ▁pure +- そんな +- ▁Person +- 造 +- ▁nti +- 語 +- ▁otherwise +- ▁gray +- ▁carefully +- ▁sogar +- 産 +- вид +- 一年 +- 项目 +- ண +- ග +- ▁thể +- kom +- ние +- ด +- 個 +- ▁daily +- 衣 +- ▁comment +- ▁我觉得 +- mek +- ෙ +- ors +- 咱 +- ▁10 +- 穿 +- bat +- ▁areas +- mme +- vol +- ś +- ▁pres +- ▁sum +- 防 +- cause +- ですから +- ற்ற +- ▁stream +- ▁今 +- ▁были +- 手机 +- 段 +- igi +- ▁ear +- ▁scale +- ▁activity +- ▁drawn +- uh +- ▁smiled +- ▁Oder +- 出现 +- 控制 +- pat +- ▁conduct +- られ +- istic +- ▁ninety +- ston +- ▁surprised +- ▁са +- ▁cas +- ▁engaged +- pie +- ▁你说 +- ▁Ent +- nc +- ▁fate +- 動 +- 相当 +- ▁policy +- 你想 +- ▁Form +- ▁شما +- あった +- ரு +- ▁aid +- ▁bur +- nar +- ▁repeated +- ▁jedoch +- 休 +- ▁throw +- 给他 +- 才能 +- ▁excellent +- 結構 +- plo +- ▁event +- ▁Ben +- ign +- 我们可以 +- ▁семьдесят +- دي +- ▁больше +- 渡 +- eh +- ▁apart +- 快乐 +- али +- 改 +- ▁neu +- ▁district +- ij +- sed +- 男人 +- ▁gun +- kte +- lem +- ▁versch +- 应 +- ▁এ +- يد +- 《 +- ▁ay +- мер +- 様 +- ▁Ye +- 层 +- ▁fue +- dat +- 饭 +- ismus +- kä +- ▁spite +- ▁dire +- ▁skin +- ▁castle +- sz +- hl +- ▁خ +- ▁bil +- ▁My +- lig +- ▁digital +- 已 +- ▁dick +- ும் +- ▁monsieur +- ▁species +- ▁О +- iku +- ▁machine +- 愛 +- ▁reform +- ▁gentlemen +- ▁fur +- '16' +- ▁regular +- ши +- far +- ु +- ▁mountains +- ▁fat +- ć +- ас +- ▁på +- ▁fruit +- ierung +- ▁correct +- 直到 +- 分享 +- ▁difficulty +- ▁نه +- こちらの +- ি +- ▁Spiel +- ▁image +- setzen +- ▁review +- ▁todo +- ▁getan +- 楼 +- об +- heim +- ım +- ▁hoe +- ï +- ດ +- вар +- ▁advance +- 放在 +- dic +- 底 +- ▁reality +- 根据 +- ▁portion +- 強い +- sor +- ска +- ▁ausge +- ▁commercial +- ▁created +- ▁spa +- 不想 +- tí +- če +- ▁还有 +- ela +- ном +- ▁temple +- 同じ +- ポ +- ▁anyone +- 姐 +- ▁보 +- 持 +- ▁keeping +- udi +- ▁message +- 看着 +- ල +- 数字 +- row +- یت +- ▁später +- isa +- 并不 +- 上帝 +- ▁crime +- ▁forced +- ▁noticed +- ▁tired +- ▁choose +- ▁pieces +- ▁đã +- 想象 +- ソ +- place +- 活动 +- ▁creature +- dom +- ▁union +- 茶 +- 父母 +- 更多的 +- 兄弟 +- legen +- ▁wahr +- '25' +- 初めて +- кой +- fra +- ques +- anya +- 我说 +- ник +- 没事 +- ずっと +- ▁proved +- ▁plans +- gh +- ▁legal +- 每个 +- ▁improve +- لو +- ▁pride +- ▁bevor +- 午後 +- 组 +- ▁erhalten +- ▁знаю +- ゴ +- hy +- ▁عن +- ▁ту +- ▁كان +- 아 +- 大人 +- nem +- ▁suffering +- rad +- 楽 +- ▁Well +- ▁кон +- ▁duke +- ダ +- ▁served +- ▁sol +- ša +- пи +- 我可以 +- ys +- ạ +- yle +- 小姐 +- ▁dachte +- fel +- ▁obviously +- 再次 +- ▁suit +- ▁fully +- ▁frank +- 作品 +- ▁Kar +- ピ +- 性的 +- kim +- 双 +- こういう +- ▁bridge +- pus +- mont +- ▁eh +- 除 +- ▁res +- ▁mention +- ▁sounds +- されている +- ێ +- 有些 +- 限 +- 情報 +- ▁لا +- its +- ▁blow +- 精神 +- 做了 +- ▁été +- ▁然而 +- ▁だから +- nye +- kwa +- ▁peculiar +- ore +- ▁burst +- ▁planet +- ▁faint +- ▁jemand +- 样 +- сто +- 伯 +- ▁nineteen +- ▁tur +- ▁asking +- ▁pounds +- ▁bringen +- んですよね +- ▁allem +- 的工作 +- bona +- ري +- ▁starting +- 明白 +- ヤ +- ▁press +- 款 +- ▁avait +- ▁pray +- 圣 +- 可能性 +- ▁gij +- 紧 +- ▁height +- 女孩 +- 告诉 +- nova +- kir +- eɣ +- 梅 +- parti +- ▁bereits +- ▁att +- ▁අ +- wel +- ▁david +- ▁Thank +- ▁measure +- ▁date +- ▁진짜 +- ▁nom +- fru +- 痛 +- stro +- gt +- 引 +- てる +- hood +- নি +- dal +- ▁forma +- 我也 +- 余 +- ิ +- ▁servant +- 结 +- ங்க +- fle +- ▁stranger +- ву +- wana +- '17' +- ▁Frauen +- ▁blo +- ric +- ▁Tom +- auf +- ней +- ▁enjoy +- 料 +- 書 +- 한 +- してる +- ▁Unter +- ▁moi +- wie +- nni +- 他说 +- ▁Nu +- ▁names +- ult +- ▁provided +- ▁breast +- vez +- 政策 +- ▁palace +- となる +- 大约 +- ल +- লে +- ▁costs +- ▁tap +- тан +- amu +- 男性 +- 而且 +- ▁devil +- ▁freedom +- ▁charles +- ھ +- 存 +- ▁authority +- bone +- mble +- ▁defend +- ▁hoch +- pens +- ▁бар +- ▁reasons +- しい +- 业 +- ▁Sam +- ▁sechs +- ません +- ▁fancy +- ▁fashion +- 毒 +- ос +- ▁intention +- ▁alive +- 倍 +- py +- 静 +- 今回 +- ▁pair +- 设计 +- ▁impression +- 儿子 +- ör +- ▁sex +- 伦 +- ellen +- ▁cyane +- ▁desert +- ▁gentle +- 认识 +- ▁bell +- ۆ +- 잖아 +- ала +- 多少 +- 压 +- 都有 +- ▁style +- ṛ +- ▁forms +- ▁anxious +- ▁teach +- 哦 +- 词 +- ▁Pu +- 更多 +- vent +- 的生活 +- ▁genug +- 怕 +- 很好 +- род +- voll +- ▁sup +- ▁Menge +- 假 +- 方向 +- mark +- ▁circle +- ▁weit +- ▁тебя +- ddi +- cul +- 自分 +- ▁twice +- তে +- 政 +- قا +- ▁ordered +- ▁gla +- ▁mer +- ▁honest +- ▁сказал +- のお +- ▁Zi +- ▁stra +- ▁possibly +- ▁angry +- ▁س +- ▁council +- sol +- loo +- dam +- ▁vida +- 能力 +- gara +- ši +- ▁brief +- ▁کرد +- eln +- 基本 +- ▁лет +- ró +- 打开 +- ▁20 +- 犯 +- nik +- br +- ▁leur +- ▁gy +- mine +- ration +- uli +- ▁ocean +- ▁origin +- ▁Sant +- ▁З +- ▁geen +- icht +- 英国 +- ▁patient +- ▁ordinary +- ▁bent +- ැ +- ▁british +- alt +- ▁ای +- ▁importance +- ▁qua +- ▁són +- 喜 +- vie +- ▁яго +- 多くの +- ▁legs +- ство +- ния +- kins +- ▁university +- long +- ▁вопрос +- ▁eigentlich +- ▁юм +- ▁hören +- 好吧 +- shed +- lijk +- rag +- 姆 +- 令人 +- 인 +- 结婚 +- ▁rising +- дер +- leid +- ▁Vater +- ▁culture +- ▁或者 +- ▁tongue +- ▁murder +- els +- はい +- '200' +- 当中 +- ▁Kopf +- arm +- ▁somewhere +- ▁volume +- ▁heads +- ega +- ▁qual +- ▁với +- گر +- nnen +- 清楚 +- ▁safety +- halt +- ▁ganzen +- কা +- ▁zo +- ades +- 支 +- pend +- 同时 +- 嗯 +- 스 +- ▁Mutter +- 激 +- 選 +- تی +- сть +- ví +- pet +- ▁problems +- ▁agreed +- ▁physical +- ▁investment +- 内容 +- ▁truly +- gus +- wal +- ▁row +- わけ +- ि +- ▁security +- iĝis +- hab +- ээ +- ▁fe +- ▁тебе +- ology +- ▁hate +- ▁shoulder +- ▁forgotten +- ord +- ▁breakfast +- ▁rode +- пла +- ▁seventeen +- ▁那你 +- ▁taught +- ▁ему +- 你是 +- ▁roof +- vir +- ▁aim +- ▁cook +- 迫 +- tä +- ▁med +- ▁mistake +- ▁promised +- ▁много +- ▁dance +- 运动 +- ▁сам +- ▁Cha +- されています +- pun +- ود +- ▁China +- ыш +- ▁join +- še +- geb +- 感谢 +- ▁versuchen +- ▁waited +- elijk +- ▁exercise +- ▁roll +- ight +- uda +- ▁ability +- ▁anne +- rou +- ▁altogether +- ▁dare +- izi +- ▁remembered +- plan +- 息 +- нов +- 氏 +- гүй +- люб +- 主要 +- ▁Jetzt +- ▁kor +- ▁би +- ▁calling +- fl +- ▁làm +- ▁Ch +- lent +- ▁fingers +- ▁dangerous +- சி +- ▁weniger +- ▁hart +- 不可 +- ▁feelings +- аль +- ▁Tra +- ▁kuri +- ▁target +- வா +- ていた +- ▁потом +- old +- 三十 +- ▁tent +- ▁salt +- part +- 质 +- jn +- けれども +- ▁failed +- 这不是 +- ▁glory +- ule +- 每天 +- از +- ▁восемьдесят +- 永远 +- ▁chose +- uit +- ▁そう +- اق +- ▁бу +- 曾经 +- ▁streets +- 汉 +- கி +- ▁specific +- nje +- ▁wil +- раз +- 関係 +- any +- を見 +- ▁вообще +- tch +- ▁terror +- tausend +- すること +- 一位 +- 税 +- ▁bist +- ға +- 跟你 +- ▁empty +- lief +- 景 +- ▁helped +- ▁method +- ▁james +- 刚刚 +- ▁vielen +- zon +- uni +- bili +- jā +- stellen +- ▁rot +- ▁quietly +- bon +- ねえ +- ▁famous +- pping +- パー +- rim +- 社 +- gor +- ▁drop +- 一条 +- ▁fil +- ▁auto +- 调查 +- urs +- ▁express +- zig +- ▁grant +- ziehen +- ▁active +- 都会 +- 消 +- ▁wear +- ▁после +- bal +- 婚 +- 亮 +- ▁байна +- iye +- ▁пре +- য় +- ▁princess +- ▁limit +- 后来 +- 万円 +- ld +- ▁ха +- ło +- ▁wij +- 生命 +- keit +- の中で +- ائ +- ▁ச +- ▁bul +- 探 +- ▁Für +- ▁kingdom +- cke +- ▁series +- ▁sufficient +- 越来越 +- ▁busy +- ▁grown +- бор +- '13' +- 有一些 +- ▁Мы +- ▁aussi +- tika +- 企业 +- ▁các +- 行动 +- ▁issues +- wert +- bus +- ▁rapid +- зы +- 告诉你 +- ூ +- ▁Za +- ▁شده +- ▁такой +- 赛 +- eli +- もある +- rz +- ▁delight +- ▁sang +- なんです +- ▁struggle +- 堂 +- 在我 +- ▁online +- ▁그거 +- tori +- ▁explained +- ño +- 軍 +- про +- ▁fault +- aw +- ▁deg +- প +- ім +- rom +- ▁ended +- ▁guide +- 目前 +- ート +- ▁signal +- ▁heat +- či +- ▁flow +- 日本の +- ▁そうですね +- phy +- nin +- 念 +- 旧 +- tto +- ▁aside +- ▁gr +- û +- ium +- ▁그래서 +- ▁liegt +- 散 +- ▁És +- ▁potential +- ▁yesterday +- ▁rights +- ▁девяносто +- ▁false +- gia +- 医 +- يل +- nze +- 違う +- ▁pré +- ▁sorrow +- ▁highest +- 医院 +- sp +- されて +- ▁hinter +- ▁Wer +- kā +- ▁extremely +- ▁Dia +- ▁experiment +- Laughter +- иться +- ▁judgment +- 网络 +- ▁exact +- ▁сегодня +- اف +- ▁fel +- ▁hell +- 你说 +- aya +- 绝对 +- ▁dev +- ▁verstehen +- 駅 +- ▁listening +- おいしい +- 第一次 +- 的孩子 +- ▁strike +- ▁connection +- ▁sentence +- ▁income +- 赤 +- 居 +- ล +- ▁European +- ▁kuba +- ▁pou +- ▁greatly +- ▁Па +- 房子 +- ▁described +- ского +- したい +- ກ +- ▁klar +- ▁recently +- 萨 +- ▁priest +- 旅行 +- ి +- ▁margin +- ое +- ▁pressure +- ▁touched +- ▁trial +- haus +- ▁separate +- ▁network +- ▁refer +- وي +- était +- tle +- ▁fix +- ▁previous +- ▁sides +- 요 +- 出了 +- 降 +- ▁companion +- uf +- uff +- спе +- 洛 +- dio +- ▁때 +- ▁Fragen +- 真是 +- ▁vers +- ▁де +- 那种 +- ▁older +- ▁disease +- ▁почему +- 大的 +- ▁angel +- lá +- нем +- ▁zehn +- ▁beim +- 進 +- ▁settled +- হ +- oso +- ude +- ▁gay +- rung +- ▁ride +- 合作 +- ▁grande +- ются +- horn +- cies +- ▁becomes +- ym +- 剧 +- pla +- 置 +- からの +- দ +- tom +- 你要 +- ▁eleven +- ▁When +- bs +- ▁людей +- ▁meer +- یا +- ْ +- ▁map +- ▁gleich +- ▁apparently +- ▁naturally +- ства +- ҙ +- 那是 +- gun +- دی +- 很难 +- 来的 +- ▁sage +- ime +- ▁onder +- 英 +- ▁гор +- ▁Prozent +- 致 +- ming +- ▁superior +- aus +- tă +- ▁имени +- sem +- 游 +- ウクライナ +- ▁Lebens +- 即使 +- ▁hun +- ▁intended +- ▁philip +- lio +- ▁sixteen +- сон +- ▁involved +- ыя +- эд +- ился +- ▁liked +- ▁capacity +- 诺 +- まず +- 轮 +- ▁vie +- ▁Ci +- iste +- wand +- ана +- ▁interview +- ▁troops +- ▁Auto +- eye +- antes +- лась +- ▁même +- ées +- ▁belief +- ້ +- 一家 +- gem +- cara +- າ +- nal +- ▁него +- ▁title +- nge +- 我们将 +- 参与 +- 你能 +- 机会 +- 除了 +- 产 +- amos +- ▁bald +- ▁release +- ▁corn +- 谈 +- 粉 +- 创造 +- 親 +- 冲 +- でしょうか +- ▁ice +- ▁مت +- ▁driving +- ▁stars +- щ +- rry +- ▁student +- hundert +- ▁highly +- 展示 +- 止 +- ▁vote +- ▁shop +- ht +- nehmen +- ande +- нд +- ▁operations +- eller +- inde +- ash +- ек +- lot +- ruh +- ա +- ▁career +- ct +- ▁definitely +- ▁chamber +- ▁argument +- ▁happening +- tier +- fr +- phi +- bes +- 晚 +- гийн +- ▁sans +- 讨论 +- ono +- ▁admit +- 抓 +- ましょう +- olo +- zwa +- 料理 +- ila +- ▁ap +- той +- 収 +- ε +- ▁avoid +- burg +- ▁thrown +- vä +- 実は +- 校 +- ив +- によりますと +- ▁todos +- ▁contra +- 架 +- ▁aller +- 整 +- やはり +- ▁flo +- 富 +- ▁suffer +- ▁shoulders +- 认 +- ▁minister +- ▁Th +- gro +- ès +- ▁match +- ▁ந +- tud +- 滑 +- ▁bereit +- spiel +- tür +- 俩 +- 期待 +- ▁concerned +- 弄 +- ▁windows +- 一定要 +- ийг +- ▁harm +- ▁cup +- ori +- hora +- 末 +- বে +- ज +- ario +- ▁thousands +- ▁kennen +- тур +- লা +- ▁pointed +- ▁nay +- ▁His +- get +- ▁ideal +- 下去 +- 它是 +- ži +- 箱 +- sing +- rd +- '80' +- ▁fairly +- ▁standard +- ▁ее +- 只能 +- ▁nose +- 的故事 +- lus +- kal +- iamo +- ▁spiritual +- ▁해 +- ▁Pri +- 田さん +- dale +- ▁carriage +- яв +- رس +- ▁verwenden +- ▁kinds +- 要是 +- ▁Gott +- 呼 +- tten +- 块 +- ▁loud +- ▁helfen +- ▁joined +- ▁себя +- ▁bro +- 吃饭 +- ▁kid +- 索 +- 하는 +- 报告 +- vit +- 可能是 +- ▁novel +- tain +- 县 +- 不管 +- ▁weitere +- gam +- ejo +- anza +- stri +- ▁claim +- reich +- ▁excuse +- த் +- வு +- 复 +- 叶 +- 就在 +- ▁fighting +- ▁mari +- пу +- ▁ones +- ▁temps +- 猫 +- ▁affairs +- gwa +- ust +- kana +- ▁wealth +- 症 +- 重新 +- ▁useful +- dé +- 百分之 +- ▁presented +- ▁flight +- 鲁 +- kam +- 灯 +- ▁grey +- cut +- ▁пад +- 记 +- 每个人都 +- cz +- ▁papa +- ique +- ▁fri +- rik +- できない +- ▁mix +- lock +- ▁loose +- ность +- fort +- ▁Cor +- بي +- ▁niemand +- ян +- ▁cuando +- ▁prayer +- ▁faces +- pad +- bare +- 歳の +- ▁debt +- ▁evidently +- abi +- ▁unseren +- ▁möglich +- まあ +- ▁suggest +- 付け +- pha +- ща +- 动物 +- ▁woods +- का +- 你不 +- ▁Х +- vien +- ▁pull +- ▁develop +- 当你 +- ▁cruel +- ▁そんな +- ità +- igung +- 借 +- したら +- ▁anders +- ▁ف +- بل +- மா +- 令 +- dus +- 因此 +- 市の +- になりました +- ▁indem +- ▁Regierung +- ▁الح +- ▁average +- 时代 +- شر +- ▁native +- 速 +- ked +- ▁ange +- ▁خود +- ▁X +- ▁아니 +- ▁tin +- ි +- tü +- 梦 +- hau +- ила +- тал +- 愿意 +- ▁worst +- por +- வி +- じゃないですか +- 続いて +- თ +- bwa +- wick +- ssa +- ▁lack +- 表示 +- ▁considerable +- ▁нужно +- olu +- hn +- 志 +- ▁trail +- ace +- rup +- ▁dogs +- 勒 +- ath +- ▁suggested +- ▁torn +- パン +- ▁nos +- ▁pal +- 程度 +- 也有 +- ify +- ▁bid +- mwe +- eix +- 慢 +- ▁raz +- ụ +- 都在 +- tent +- ▁пока +- 父亲 +- 贵 +- haft +- 条件 +- ск +- र् +- ▁Dan +- 注目 +- ▁arbeiten +- lichkeit +- 是谁 +- bas +- ▁gate +- ▁மற்றும் +- ▁male +- uel +- нь +- ▁dabei +- ▁sought +- ▁raise +- ters +- elles +- ▁какой +- それで +- ▁이제 +- ▁gain +- 的想法 +- ▁bodies +- book +- ▁wohl +- ▁それは +- ゆ +- hind +- ыр +- ▁constant +- 明日 +- ▁heb +- ▁好吧 +- 威 +- 心里 +- وت +- ▁sacrifice +- 然 +- ▁Männer +- 普通 +- ▁labour +- 产品 +- ете +- ▁bekannt +- 分析 +- 込み +- 看起来 +- とき +- 莫 +- ▁refused +- 占 +- gé +- 顿 +- comp +- しない +- ▁slightly +- ▁хорошо +- 道路 +- ▁fail +- しっかり +- ▁sheet +- 招 +- ▁source +- ▁Meinung +- ▁dot +- ▁Dank +- 经 +- sign +- 是你 +- ening +- ум +- ▁birth +- ▁també +- bury +- コン +- ▁hills +- 顔 +- ui +- ▁rapidly +- ▁pur +- ▁becoming +- 我知道 +- 谢谢 +- 盘 +- ▁saved +- kken +- hang +- ▁поэтому +- ▁bringing +- 谢谢你 +- ▁Nicht +- mia +- ▁tender +- ▁showing +- дел +- える +- 以来 +- pal +- 也没有 +- ской +- ▁computer +- নে +- ギ +- ▁virtue +- 曼 +- ▁nehmen +- ره +- 硬 +- ▁unknown +- ▁tradition +- fon +- cr +- ws +- なる +- ▁finger +- ▁advanced +- inte +- мат +- 句 +- сэн +- ▁Film +- 到底 +- avi +- ▁papers +- ▁governor +- trag +- ▁upper +- 就可以 +- ▁じゃあ +- cé +- produ +- кам +- ▁Post +- ▁audience +- たち +- anga +- 切り +- با +- ▁extent +- ▁peut +- ▁pin +- なぜ +- halten +- ▁solche +- ▁gift +- ▁э +- endo +- 側 +- ▁neuen +- லை +- ▁pity +- ▁theory +- sal +- 提出 +- ▁Em +- ▁¿ +- ▁alte +- 場合 +- ▁division +- 历史 +- ▁everywhere +- iva +- '70' +- ▁Bir +- 들 +- なって +- pli +- 北京 +- ization +- 沉 +- ▁çok +- ▁praise +- ▁cities +- term +- yer +- ▁Des +- ▁Vo +- ▁portfolio +- ▁Tre +- これから +- று +- 会社 +- 价 +- ▁первый +- ▁dressed +- ▁Auch +- punkt +- ел +- 教授 +- ▁servants +- 建立 +- ▁charm +- ▁nächsten +- ▁cell +- ▁System +- ベ +- 靠 +- 育 +- зи +- стр +- cept +- хи +- ▁See +- ▁talent +- ▁Idee +- wir +- ▁où +- 你有 +- right +- ál +- 魔 +- ▁кор +- 射 +- ▁trip +- ▁established +- ül +- ▁sua +- ▁features +- ▁الس +- ▁distant +- 至少 +- ▁rome +- ▁Punkt +- ▁kitchen +- ced +- ets +- 都不 +- stellung +- ▁wake +- yl +- 具 +- ецца +- ▁advice +- ешь +- xa +- 国内 +- なんて +- ▁quarters +- 坏 +- ibi +- না +- 折 +- 一百 +- ▁gewesen +- ▁occurred +- esa +- эх +- ▁Hause +- kus +- 結果 +- 刀 +- ér +- ▁respond +- 満 +- ▁reflect +- phe +- ( +- ır +- ▁moments +- ▁Park +- isten +- ワ +- ේ +- ▁yr +- ▁beg +- ▁аб +- کن +- ▁mode +- ▁ذلك +- jou +- ▁forces +- 家里 +- ised +- ▁committed +- ▁hy +- ▁Г +- 是如何 +- ña +- zza +- 判断 +- mber +- その後 +- ▁announced +- ▁Tri +- ▁quatre +- ▁bought +- дар +- уд +- ности +- 空间 +- 立ち +- cro +- 核 +- 怪 +- 刚才 +- ▁думаю +- ständig +- main +- ▁robert +- ▁discover +- ара +- ▁pace +- ▁guidance +- раб +- ▁include +- 業 +- ▁kiss +- mbe +- 藤 +- ▁pulled +- ▁había +- ▁بعد +- cions +- 言葉 +- ▁Gesicht +- بار +- nell +- ▁darin +- сі +- 恶 +- lf +- ▁teeth +- ▁How +- ▁пе +- ej +- を受け +- 活動 +- ▁lamp +- '24' +- ▁smart +- ▁leg +- ீ +- 礼 +- rac +- ▁lovely +- pose +- きます +- বা +- த்து +- 总统 +- 都没有 +- зе +- ает +- 所有这些 +- ▁card +- ▁jane +- 警 +- ▁facts +- dur +- ▁ankaŭ +- ▁我就 +- mir +- 身边 +- ▁sunday +- 投票 +- ▁prefer +- ră +- 黒 +- ema +- ▁detail +- ▁shame +- ▁oli +- 電 +- 마 +- ▁fut +- str +- 主义 +- ож +- ▁block +- บ +- ▁Cu +- 世纪 +- 番組 +- 航 +- ▁هذه +- ▁frei +- ▁этой +- ře +- ▁closely +- ▁intelligence +- ▁erst +- 卫 +- 意识到 +- ì +- ▁hang +- mma +- ▁hearts +- ▁leicht +- ▁drove +- app +- nga +- нос +- 支援 +- ske +- 厚 +- ▁bud +- ▁அவர் +- ▁One +- 进来 +- ▁afford +- back +- ▁zal +- 什么时候 +- ▁whispered +- 現場 +- ▁medical +- 旅 +- 真正的 +- pra +- être +- kor +- ▁pop +- 建议 +- tı +- 増 +- ico +- ▁Warum +- ▁போ +- ▁protection +- ▁details +- ▁Ol +- ▁milk +- 搞 +- 顶 +- eza +- いろんな +- ▁comfortable +- ことは +- ▁Ob +- 论 +- ▁section +- 属 +- oon +- ▁department +- ат +- ▁다 +- 博 +- ▁gathered +- 证明 +- ▁existing +- ން +- 게 +- von +- 尾 +- izing +- ▁affection +- かなり +- ech +- ▁metal +- ▁liebe +- 蒙 +- sko +- ここで +- 他是 +- ür +- pé +- 势 +- ▁dozen +- фи +- 影 +- тро +- 最好的 +- ▁secure +- lab +- ▁extraordinary +- ▁anyway +- ▁cosa +- ▁listened +- 货 +- sus +- ▁brauchen +- ▁slight +- nta +- ▁Namen +- ますね +- ▁conclusion +- ▁female +- нее +- سم +- 抗 +- пол +- tos +- ▁gene +- 观 +- ▁laughing +- ▁represent +- ▁manage +- 鬼 +- ▁syn +- ▁solo +- ▁crown +- ▁handsome +- ▁Pre +- town +- ▁Video +- 领 +- ▁ở +- ▁heeft +- 我们有 +- 延 +- ▁bitter +- ▁carrying +- 一会儿 +- ▁daha +- 我有 +- ▁„ +- 感情 +- ▁nurse +- ు +- 胡 +- ▁smooth +- ▁iran +- ▁él +- 運 +- 経済 +- ▁indians +- 是为了 +- sicht +- 位置 +- eḍ +- ▁donc +- ▁больш +- ▁Hal +- მ +- nek +- 象 +- ▁treatment +- 我已经 +- っていうのは +- medi +- ත් +- ▁অ +- டை +- なんだ +- ப்பு +- ▁consideration +- ▁த +- 制作 +- тэй +- tions +- 不到 +- ▁appears +- ong +- дал +- 的一些 +- fini +- ▁Wissen +- ▁当然 +- 今回の +- ▁developed +- 宫 +- 运 +- ▁careful +- 新闻 +- ▁obliged +- ế +- ▁projects +- もっと +- ▁treat +- kh +- ŝ +- ের +- ▁discussion +- ▁clar +- ▁sự +- べ +- ▁về +- zie +- 思考 +- ▁reward +- 环境 +- ヒ +- ▁П +- kö +- ▁дело +- 参 +- ボール +- гі +- 冬 +- ente +- ▁resources +- ▁வி +- ▁второй +- 日に +- 네 +- ència +- cks +- şi +- ා +- ▁independent +- ▁professional +- さらに +- ▁Is +- 地说 +- 各种 +- blick +- пе +- ▁staff +- ▁Raum +- iyo +- 铁 +- 是因为 +- zan +- 十分 +- ▁role +- 阳 +- 引き +- ▁тогда +- pl +- ティ +- ▁rough +- 举 +- ່ +- 冰 +- ▁nta +- ▁jest +- ▁focused +- ▁drug +- ▁mighty +- 記録 +- ▁ප +- द +- fli +- aha +- ctor +- bor +- ên +- bank +- 日の +- az +- ो +- ▁Q +- 了吧 +- ▁transport +- hält +- ▁lies +- ▁wow +- ▁schools +- wit +- ▁reg +- ▁smaller +- それは +- 最大 +- ▁Woche +- ▁кол +- rap +- 想到 +- ን +- ▁systems +- ▁mé +- ball +- あれ +- なかなか +- 幸福 +- cion +- のに +- ▁response +- tá +- ▁compared +- ▁Mon +- ▁imagination +- ▁pare +- 是吧 +- ค +- رف +- ▁200 +- nyi +- 做出 +- рав +- ▁louis +- ppen +- 曾 +- 会有 +- ▁realized +- каз +- ▁Wort +- ▁Chris +- ▁artist +- ção +- くる +- ▁element +- sho +- ▁sieht +- 归 +- 说了 +- gl +- este +- تا +- ý +- gate +- einander +- dí +- ciones +- 随着 +- ต +- 欢迎 +- 年代 +- uga +- 岛 +- 独 +- nom +- nken +- げ +- 职 +- ▁kurz +- ▁permit +- 尽管 +- ▁organization +- vier +- ère +- ▁extreme +- 危险 +- ژ +- fold +- ▁training +- 现 +- hat +- 寻找 +- ▁spr +- ings +- ▁University +- 行为 +- カー +- ▁Dollar +- ▁incident +- mbi +- gon +- 塞 +- ▁David +- ▁instrument +- ▁hal +- star +- ▁questo +- ▁rare +- でしょ +- 价值 +- ▁Schw +- 嫌 +- ▁cop +- ã +- უ +- ▁organis +- 把我 +- ▁Neu +- யா +- られて +- ▁cya +- ität +- ▁leader +- 歩 +- 仕事 +- ▁smiling +- ▁まずは +- imo +- ▁Kind +- бер +- ం +- ▁beide +- peri +- 受到 +- 暗 +- ▁contrary +- ▁saint +- ▁deine +- ▁treasure +- llo +- ▁hidden +- ierten +- iyor +- ▁patients +- 状態 +- ▁mess +- ▁unbe +- ▁skill +- nza +- ▁eso +- ▁joe +- ▁principle +- 示 +- ▁realize +- 领域 +- ▁EU +- ▁relief +- ▁ain +- 的歌 +- يت +- ▁Pen +- ちゃんと +- ▁arab +- فا +- مر +- ▁Che +- lim +- ▁succeeded +- pos +- ▁fleet +- wind +- өр +- 同样 +- ▁measures +- 豆 +- 公共 +- ▁actions +- ▁temper +- жы +- ▁不过 +- һ +- ▁gan +- ayo +- ってる +- ▁carrer +- ▁liberty +- রা +- ▁ئا +- ▁setting +- оў +- ट +- ▁protect +- 藏 +- 我是说 +- ▁colour +- ▁более +- 刺 +- ▁function +- blo +- ▁estate +- ▁invest +- sid +- ▁families +- ▁Sha +- ыл +- ун +- ▁wore +- nat +- ▁steht +- 同意 +- 劳 +- ppe +- ▁falling +- 必 +- 私は +- 委 +- 简单 +- ▁suffered +- に対して +- тэ +- টা +- 随 +- ▁Sp +- ▁frequently +- hol +- ▁somehow +- 不可能 +- 十二 +- ▁partner +- itude +- ▁flesh +- ▁powers +- اع +- 你看 +- ▁olarak +- ▁ships +- '500' +- cus +- 』 +- ▁yi +- اه +- ▁ج +- 唯一 +- ист +- ▁stellen +- 有时 +- ▁campaign +- uje +- 『 +- ▁solution +- aron +- 上了 +- ubu +- ery +- shy +- ydd +- ▁nevertheless +- 投资 +- ▁overall +- 床 +- 対応 +- ▁affair +- 我不 +- ▁awful +- ▁Europa +- ▁như +- ▁stark +- ▁begann +- ▁Recht +- ▁picked +- ▁Ihrer +- kazi +- 基本上 +- 说的 +- bia +- ▁accepted +- ▁sp +- ▁request +- фе +- ▁Freund +- ency +- 功 +- vari +- shaka +- ▁favour +- ▁direkt +- ▁euch +- ක් +- ▁plenty +- ▁ĉi +- 有了 +- unt +- ём +- 等等 +- ▁businesses +- ▁seized +- もあります +- dien +- ów +- ▁grim +- 特に +- 不过 +- ▁glauben +- hal +- ▁kra +- üh +- ▁conference +- ▁zeigen +- ▁motor +- ▁пера +- 患者 +- юць +- ▁peu +- ▁thế +- 以为 +- 薬 +- sey +- 产生 +- ▁faithful +- ▁satisfied +- fic +- spect +- yor +- lly +- ▁dust +- ▁dreadful +- 即 +- ▁하 +- 私の +- ▁うん +- ▁jim +- ▁pet +- 套 +- 一直在 +- 에서 +- ▁funny +- viv +- 股 +- weg +- cs +- ▁expressed +- ▁مو +- 累 +- 上げ +- mara +- ▁foundation +- аны +- ont +- ▁tip +- gul +- وس +- 職 +- 把他 +- さんに +- uv +- にして +- ▁Sal +- ▁remarked +- ここに +- ▁appeal +- pel +- dul +- ं +- ▁meal +- ▁Internet +- ▁bob +- ▁fields +- 前の +- 生物 +- ▁possess +- ▁soldier +- эт +- つけ +- ▁horror +- ▁alla +- лет +- 変 +- んだよ +- ▁kal +- 你也 +- veni +- 还没有 +- cri +- 贝 +- می +- 当我们 +- лар +- ome +- 洋 +- дан +- ▁воз +- ▁ක +- gie +- 吸 +- 预 +- ▁excitement +- ▁거야 +- 是一种 +- ▁contact +- ▁rules +- 両 +- ▁mac +- ▁negative +- ▁Ist +- 敢 +- ▁Гэта +- ▁connected +- ▁universal +- ▁Gar +- ▁irgendwie +- pil +- ▁majority +- ▁destroy +- ▁Los +- 蒂 +- hod +- 我去 +- prob +- kol +- がありました +- ▁Den +- ோ +- ▁sé +- ▁relationship +- со +- ▁bore +- ▁lifted +- 编 +- tory +- ▁Körper +- ▁fu +- ▁whenever +- かもしれない +- ▁sprang +- வே +- と思う +- 了一 +- cat +- ▁我要 +- これを +- top +- 汽车 +- зі +- 害怕 +- '90' +- ▁iyi +- ▁With +- ▁şey +- ▁qué +- ▁emperor +- ▁lock +- oh +- ▁Và +- ▁flag +- ▁Reihe +- 结束 +- ▁tail +- ▁pardon +- ians +- ред +- ▁proof +- ▁fal +- ▁protest +- ▁parties +- 地域 +- 死了 +- rian +- 你必须 +- яр +- ▁rid +- ▁amazing +- kas +- మ +- nig +- 袋 +- Z +- ▁hello +- ட்டு +- ké +- кова +- ▁challenge +- 但我 +- сад +- ▁Pan +- act +- isto +- 我没有 +- زی +- ▁savage +- tisch +- ▁Angst +- ▁spo +- 丁 +- mise +- 弹 +- 我都 +- 可是 +- ▁prior +- だって +- ▁West +- ▁adam +- ▁nest +- 我还 +- ▁resist +- ▁Antwort +- ▁rev +- ▁수 +- ▁mot +- Y +- ▁இந்த +- ▁decide +- ▁wondering +- ▁phone +- ▁所以我 +- 境 +- ▁crossed +- down +- 疑 +- ▁radio +- 母亲 +- 印象 +- ▁Saint +- ▁те +- mobil +- ▁wisdom +- ▁để +- 战争 +- nna +- ▁anger +- ップ +- ▁flower +- ▁Familie +- kli +- ▁zei +- مل +- ▁Не +- rze +- ▁screen +- aniran +- ington +- ▁کو +- ▁frame +- 食べ +- ktor +- ange +- kü +- '""' +- 露 +- ▁đi +- ▁occupied +- ả +- ▁それでは +- nit +- 翻 +- ▁despair +- ▁washington +- five +- 人に +- 務 +- 和你 +- ▁kunnen +- ров +- ▁demanded +- 里的 +- ose +- 的名字 +- uti +- 天气 +- col +- シャ +- ▁Liebe +- бай +- ▁dawn +- 烟 +- colo +- 做什么 +- ▁schi +- 最初 +- ▁statement +- دار +- nam +- نى +- させて +- 商品 +- 까 +- band +- 杨 +- position +- tage +- 土地 +- ▁gerne +- ▁ghost +- 谢 +- ▁пол +- ▁fundamental +- ▁managed +- 池 +- 治疗 +- duc +- ▁Ihren +- 人员 +- ▁enemies +- eurs +- ▁School +- ▁kur +- ▁rank +- ель +- nah +- なんですが +- fern +- ▁yer +- 衣服 +- 凯 +- ▁communication +- ▁agreement +- ▁marked +- treten +- ெ +- tti +- ▁Son +- бра +- 共同 +- 赶 +- みたい +- ▁一方 +- ▁increasing +- य +- 니까 +- ▁Let +- ▁removed +- وب +- 浮 +- 発表 +- 有没有 +- をして +- ▁dying +- ▁slave +- 关键 +- ▁remarkable +- 进去 +- ▁Krieg +- 権 +- dzi +- tó +- iş +- ▁Het +- ▁теперь +- ▁supper +- ▁ari +- ஜ +- ▁cow +- न् +- 他们在 +- 年の +- ▁improvement +- ▁mistress +- 计 +- 舞台 +- 团队 +- ු +- 面前 +- ered +- ▁equally +- ▁суд +- ▁jak +- iem +- ▁violence +- بی +- ▁strategic +- ▁burning +- öl +- 没有人 +- 今の +- くらい +- ▁шу +- liegen +- ▁très +- ▁schien +- ▁However +- ▁mü +- などを +- ▁poet +- ▁الك +- ▁bishop +- ▁clo +- ▁deck +- お願いします +- ▁baron +- ▁Mor +- stig +- 有多 +- ▁farther +- 皇 +- 课 +- 恋 +- кая +- ень +- ▁primer +- 的声音 +- 人民 +- ▁말 +- ▁regarded +- ▁Spe +- 你好 +- 值 +- ▁groups +- ▁asleep +- 尤其是 +- ▁Э +- 組 +- пом +- ▁kom +- 戴 +- ▁effective +- ት +- stin +- sky +- ▁mile +- ▁verschiedene +- ▁Alle +- யில் +- 机构 +- 如果我 +- ında +- の方 +- 聞いて +- ▁educa +- jas +- ▁code +- ▁kas +- ▁Cap +- stellt +- ▁Ste +- chter +- テレビ +- ▁generation +- 坐在 +- 秘密 +- од +- もし +- ▁wedi +- роз +- พ +- vita +- ▁sprach +- ▁dengan +- ▁cab +- ▁describe +- ▁route +- ಿ +- ▁weißt +- ▁nahm +- ед +- いない +- ▁좀 +- grad +- ▁esto +- 원 +- ▁calls +- 务 +- ▁deeply +- că +- انی +- ▁continues +- னை +- オリンピック +- 了很多 +- ▁meat +- お前 +- 的大 +- bul +- thy +- 先ほど +- ngu +- ▁gew +- uba +- ▁pack +- сты +- wyd +- 丸 +- ▁arch +- ▁播放 +- 怀疑 +- 了你 +- weise +- ага +- آ +- ▁topic +- oz +- ган +- ▁herum +- 看见 +- 和我 +- ▁Amerika +- 移 +- ▁behold +- schau +- 这个人 +- ▁inner +- 营 +- ▁import +- angle +- rice +- ▁capable +- гла +- 女儿 +- ▁nervous +- ▁Kra +- 介绍 +- ▁flying +- 熊 +- ▁hacer +- ▁chain +- 唐 +- ▁minds +- がない +- whi +- 妹 +- 混 +- tische +- 姑娘 +- mah +- ▁acht +- ▁будзе +- 地球 +- ▁أو +- ▁innocent +- gui +- run +- ▁Men +- vě +- ▁software +- ffer +- 背景 +- ific +- らない +- bet +- களை +- تى +- 宁 +- ▁begun +- uzi +- ▁levels +- vă +- 年轻 +- ▁yield +- lap +- よりも +- ▁Informationen +- ▁بی +- ▁spirits +- ▁alarm +- ▁проезд +- ▁machte +- ▁explanation +- 对我 +- 不仅 +- ▁jam +- zone +- ▁younger +- র্ +- 优 +- لت +- гал +- toj +- ▁hide +- ▁buried +- マン +- 攻撃 +- ▁worthy +- 希 +- 吹 +- ▁Commission +- ▁konnten +- ा +- ▁gods +- tors +- lama +- ▁aspect +- ▁eene +- このあと +- dum +- 那样 +- ▁pred +- ison +- org +- 并且 +- ▁rear +- ரா +- 具体 +- ▁wave +- 加入 +- клад +- ▁complex +- 你现在 +- ▁Arm +- を見て +- loc +- ▁southern +- ▁mayor +- ງ +- gs +- тель +- ▁doors +- лә +- ▁citizens +- 麻 +- 眼睛 +- ▁forgive +- نت +- ▁evident +- 事儿 +- 人で +- 帮我 +- வை +- ▁commission +- ▁site +- 自身 +- ▁included +- igt +- 宮 +- 莉 +- 不太 +- 莱 +- ▁unique +- setzt +- ▁अ +- 第三 +- ▁sons +- 展 +- ▁dull +- pass +- ▁我说 +- ▁bottle +- 容疑者 +- ▁dari +- ▁الع +- wydd +- ▁verschiedenen +- ▁bull +- 一只 +- ▁stehen +- 関 +- っていた +- ▁کا +- ▁чего +- pers +- ν +- ступ +- anda +- 高い +- তো +- worth +- 你还 +- ▁dim +- ▁tower +- ▁millions +- ▁satisfaction +- ▁effects +- ▁rates +- ▁gener +- 表明 +- ▁虽然 +- 灵 +- 전 +- kuwa +- 叫做 +- 下面 +- ▁cheer +- من +- کر +- 领导 +- 变化 +- عد +- 正常 +- 第一个 +- ▁clever +- ▁treated +- ▁divers +- water +- lös +- ő +- ▁rocks +- 路上 +- ▁necessity +- もちろん +- 的一部分 +- licht +- ingly +- ۇ +- 我们会 +- yard +- 了他 +- аг +- ▁ново +- oka +- ▁zero +- ▁fund +- ▁Natur +- 接種 +- ridge +- ▁pipe +- 老板 +- قد +- ▁figures +- 丽 +- vant +- 在哪里 +- maze +- だけで +- 替 +- lah +- ▁edward +- ▁shade +- 人間 +- 乡 +- 在他 +- nim +- ĉ +- 备 +- それが +- 新しい +- ▁friendship +- oi +- ホ +- 你这 +- nud +- ▁liber +- ▁sheep +- ▁вер +- ▁pictures +- 义 +- ▁election +- bung +- gira +- ▁Tur +- эм +- alo +- ▁attitude +- ▁них +- jes +- 橋 +- ▁conscious +- že +- ▁proportion +- ▁ruin +- cil +- ▁себе +- cyo +- 其他人 +- ▁document +- ▁western +- ▁oo +- ез +- ▁economy +- ▁twee +- cient +- ư +- ▁palm +- gua +- elli +- ств +- と思って +- raz +- ▁execution +- 視 +- 麦 +- ными +- ▁entrance +- 食物 +- кт +- ▁manifest +- ци +- ▁perspective +- ▁nations +- ▁ху +- lea +- anti +- ▁Bre +- ▁plate +- ▁desired +- ço +- ペ +- ▁alten +- ▁الب +- ▁expense +- 女子 +- そこ +- vou +- 目的 +- 坚持 +- ▁tiny +- voir +- ▁allein +- ▁confi +- шел +- 导 +- ります +- tica +- ▁gradually +- ▁Chi +- cial +- ▁pli +- 壁 +- ▁mem +- tab +- 会议 +- wis +- ış +- 私が +- ▁smith +- 一人 +- ▁obtained +- في +- ▁relation +- ▁grup +- ▁gaze +- رب +- rous +- していました +- か月 +- 彩 +- rk +- 许 +- hä +- ▁tour +- ▁giant +- ▁perform +- いや +- ▁lad +- ▁triumph +- ▁finish +- oku +- ニュース +- 艺术 +- ▁бер +- 席 +- ▁всех +- やすい +- hung +- ▁mí +- jí +- ▁sail +- ▁require +- ▁core +- ▁einzige +- 調査 +- рам +- 马上 +- ▁сказать +- 影響 +- ▁wounded +- griff +- 大哥 +- ▁Tür +- 完了 +- ▁Ziel +- 一场 +- 一旦 +- gent +- ▁untuk +- ▁criminal +- nny +- 企業 +- ▁revolution +- 还要 +- ▁mystery +- ▁mercy +- aban +- ▁constantly +- cli +- ▁teaching +- taj +- shobora +- rod +- cast +- roll +- ▁teacher +- итель +- ologie +- bly +- ▁update +- 僕 +- ▁acquaintance +- 休息 +- 今月 +- ▁article +- 挑战 +- ▁эта +- ▁bisschen +- 某 +- ▁あの +- fri +- 照顾 +- 酸 +- ▁nói +- وق +- ▁sul +- ▁visible +- ▁instantly +- gegangen +- ▁dei +- ▁それ +- ▁значит +- ▁versucht +- 対策 +- ▁работа +- өө +- ▁через +- ▁Ed +- 测试 +- 护 +- year +- というのが +- ▁counsel +- сла +- ҫ +- ですけど +- 接下来 +- ▁你看 +- 難しい +- ▁tres +- 实现 +- ▁триста +- last +- 按 +- 觉 +- 宗教 +- ▁Plan +- کی +- 这么多 +- 确定 +- 悪 +- ▁letzte +- ец +- ▁violent +- ▁knees +- ▁knight +- 在我们 +- 如果我们 +- isha +- aria +- ını +- ▁пу +- '21' +- gru +- ▁nel +- ▁crack +- ് +- 因为他 +- ttle +- ▁splendid +- ▁richard +- ▁Mont +- ▁brilliant +- ▁assured +- script +- 对你 +- rel +- ▁mill +- sca +- imu +- 減 +- ▁jede +- ▁какие +- 责任 +- apo +- ▁être +- ▁az +- 补 +- 就是说 +- ▁earnings +- 新たな +- ▁fragte +- ვ +- ▁necessarily +- tiv +- ▁wit +- ▁critical +- ▁harry +- 一张 +- gol +- 赶紧 +- 潜 +- ではない +- cou +- 一緒に +- ▁confident +- ▁lag +- 本来 +- ew +- In +- лася +- 言って +- 建筑 +- ާ +- 上がって +- ▁multi +- ically +- ▁turns +- 惊 +- ию +- ▁website +- よかった +- ▁worship +- ▁unable +- ▁throat +- ӱ +- ▁grief +- lement +- ここから +- مي +- ▁relations +- iɣ +- ▁كل +- ière +- 额 +- ▁published +- 纸 +- dina +- ▁vis +- umi +- ▁suspicion +- 陆 +- test +- ▁دارد +- ўся +- ▁sẽ +- ▁Det +- 也会 +- dad +- eten +- ▁petit +- در +- জ +- 我们要 +- ▁darum +- ▁maintain +- ▁director +- ▁الن +- غا +- 这个问题 +- 馬 +- 我很 +- aja +- ▁Paul +- 库 +- fy +- ▁official +- rí +- ▁delivered +- ▁cart +- ▁goed +- 上面 +- ▁schön +- ▁prisoner +- enga +- 让他们 +- ▁universe +- 挑 +- ▁rooms +- оп +- ▁bija +- аць +- 站在 +- pper +- ایی +- ▁оста +- ▁regret +- ▁differ +- ▁나는 +- даг +- ▁Col +- んですけれども +- ene +- ந்து +- 这儿 +- トップ +- ▁mat +- 言う +- ▁puis +- tail +- だろう +- ▁実は +- 来到 +- 起こ +- ▁tal +- ▁objection +- ▁Not +- üm +- ▁reden +- 我自己 +- 经历 +- ▁midst +- ▁admitted +- ▁እ +- 耳 +- ▁نمی +- にかけて +- ится +- ▁threat +- ás +- ▁stronger +- ingen +- 考えて +- ხ +- كم +- Т +- ▁county +- zem +- ▁halten +- 遇到 +- eff +- 聊 +- ▁worry +- 错误 +- fla +- 观察 +- fte +- ▁proposed +- 免 +- ▁hence +- ▁attend +- لة +- ume +- ▁cada +- ▁nearer +- ▁Mädchen +- 联 +- 大事 +- 甲 +- lassen +- өн +- ▁такое +- ▁roz +- の中 +- ▁guns +- ик +- ট +- cade +- 牌 +- 岡 +- ▁description +- ▁Ка +- ▁lots +- ないと +- ▁web +- 男孩 +- ▁잘 +- 最大的 +- اند +- 争 +- ▁adalah +- рад +- ▁concerning +- ▁singing +- ▁blame +- lè +- ▁affected +- ▁folks +- ▁verwendet +- kira +- ા +- fas +- ▁shock +- ▁さらに +- 容易 +- ick +- 日本人 +- ously +- 你自己 +- ▁possessed +- mittel +- ▁cabin +- бі +- ▁liberal +- ▁我认为 +- ▁stones +- 根本 +- 缺 +- ▁engine +- graf +- vat +- ▁vice +- ▁plants +- 变成 +- dung +- 录 +- lerin +- ▁Bro +- 唔 +- ▁mc +- dü +- ▁бул +- ▁changing +- ▁scientific +- ท +- ▁immense +- ▁val +- zug +- ▁sport +- ▁largest +- über +- lk +- ▁magic +- ▁гар +- meter +- 届 +- gger +- ე +- 笔 +- ▁私は +- ▁absence +- 京 +- ▁absolute +- ▁дела +- லி +- cons +- 媒体 +- ▁пи +- 这件事 +- 原来 +- ▁wy +- 丹 +- قل +- aries +- ▁Kan +- ▁conflict +- تم +- 对吗 +- 府 +- ▁над +- ischer +- 園 +- ▁brothers +- しく +- oth +- னி +- ▁proceeded +- ▁sem +- nak +- ▁intent +- ▁کند +- ▁disappeared +- 只要 +- ово +- bag +- chte +- ▁govern +- ugh +- きのう +- 秀 +- yar +- ▁mortal +- 停止 +- ▁voices +- 之间的 +- ▁nächste +- ディ +- ▁Au +- ▁rue +- گی +- ප +- zz +- ▁bleiben +- ▁لم +- ▁favorite +- pak +- zähl +- ▁hoped +- ▁Nein +- ▁mae +- なり +- 声音 +- ▁seated +- дет +- ▁bold +- meye +- ▁affect +- ▁tempo +- ે +- ▁soil +- 乔 +- 宇宙 +- ह +- 镇 +- 骗 +- یک +- ▁мар +- 暴 +- aan +- ▁waters +- 中で +- ั +- ucht +- мет +- ▁awake +- elo +- ▁expectations +- ▁sieben +- знач +- মা +- としては +- 撃 +- 工具 +- usa +- 记录 +- 일 +- ▁arrival +- atu +- 在这 +- ▁Que +- ▁hole +- ffe +- 港 +- ▁coffee +- 選挙 +- gleich +- ▁studies +- 幅 +- ▁gente +- 嘴 +- komp +- ▁wusste +- tric +- ▁library +- 增加 +- nzi +- ▁delicate +- ▁domestic +- mina +- ▁scheme +- ▁перед +- rö +- bild +- ▁profession +- ▁experienced +- ▁jedes +- ▁interests +- 后面 +- ▁Kom +- 经过 +- ▁elizabeth +- enda +- プロ +- ▁vessel +- hold +- ▁eating +- ▁quando +- czy +- abil +- merk +- ▁remind +- 盛 +- ▁channel +- ▁Nacht +- ▁না +- 有个 +- 也没 +- 意思 +- ▁link +- ▁تر +- ▁numerous +- zt +- のある +- 今後 +- bun +- نگ +- ▁recognized +- ▁scheint +- ▁الج +- 这就是 +- рын +- ▁custom +- ▁mundo +- ▁occur +- nau +- 的原因 +- 所以我 +- lect +- сці +- そうな +- ▁vez +- 行业 +- ▁bath +- ▁Gehirn +- ax +- 由于 +- 描述 +- ▁pol +- 陪 +- ▁silk +- ort +- ▁crew +- ▁Wi +- bt +- ▁frightened +- ▁всего +- 现实 +- dies +- 娜 +- ザ +- ▁seines +- たくさん +- ▁えっ +- ▁hor +- 我不知道 +- ▁Bereich +- ▁ceased +- 湖 +- 爹 +- ▁israel +- ▁wondered +- ▁objects +- 是我们 +- ▁prime +- 丝 +- ▁apartment +- ▁steel +- ▁ещё +- ただ +- 我相信 +- ▁resolved +- ▁Nor +- ▁Bur +- 固 +- Al +- 洞 +- ▁更に +- 中央 +- 舞 +- ▁china +- ▁cert +- ▁Mein +- 确保 +- mini +- 怀 +- ▁stepped +- 依 +- ▁shelter +- ▁fourteen +- ching +- ▁bou +- ▁failure +- ▁investments +- амі +- кон +- ▁christmas +- ▁signs +- 看到了 +- ▁khi +- ref +- ছে +- ▁groß +- மை +- ▁committee +- autre +- ▁anywhere +- ▁organ +- ▁게 +- ▁apply +- ບ +- いません +- ▁chosen +- ook +- тә +- ▁buck +- んだけど +- 음 +- ▁Wal +- ular +- erung +- 優勝 +- ▁mysterious +- ▁پر +- 決め +- ĝ +- ▁friendly +- 上がり +- 打ち +- rez +- ▁Abend +- ▁tous +- xe +- 万人 +- ▁Энэ +- ▁кар +- ぬ +- سل +- prav +- scher +- adi +- ▁ён +- lyn +- スター +- jun +- ை +- 一部 +- බ +- thing +- ▁principles +- ▁너 +- eka +- ອ +- ▁voyage +- ラン +- nut +- 最好 +- ▁rent +- 方が +- ▁Tod +- ▁Staaten +- ▁دە +- 奖 +- ▁majesty +- wij +- 易 +- ▁tiene +- ▁закон +- 麻烦 +- ▁Rat +- 过了 +- 達 +- ▁حال +- 传统 +- ۈ +- iyi +- 週間 +- つく +- radi +- ▁Haupt +- hil +- ▁bet +- 对不起 +- ▁prices +- ▁manchmal +- ▁manera +- 习惯 +- ▁construction +- gat +- X +- 的所有 +- ▁error +- 抱 +- ▁està +- なくて +- ▁heißt +- weit +- 我现在 +- ▁mission +- nica +- àn +- cor +- zna +- scha +- ときに +- hui +- ago +- ▁ee +- ▁herr +- 模式 +- 人気 +- 高兴 +- 准 +- ▁为什么 +- どこ +- стан +- 学院 +- 我看 +- 結 +- gne +- fahren +- ▁foolish +- 枚 +- ə +- ที่ +- ifi +- já +- homme +- 检查 +- 银行 +- 允许 +- дзі +- ▁pause +- ▁rief +- ො +- 醒 +- uto +- というか +- টি +- list +- 午前 +- ▁problema +- char +- 勢 +- ▁Licht +- nov +- 乗 +- ▁Pla +- ▁char +- ▁internet +- ▁coach +- кан +- 还没 +- enza +- ▁gently +- ▁beach +- fried +- 标准 +- ñ +- ▁currently +- 不行 +- ▁encore +- 圈 +- ለ +- iß +- fect +- 年轻人 +- ▁College +- ọ +- ▁discovery +- ▁practical +- ▁ĝi +- 电视 +- poli +- ▁oben +- 命令 +- дзя +- 艾 +- ▁stands +- 虽然 +- ▁besteht +- ▁Möglichkeit +- ▁git +- ▁paused +- ▁sooner +- ksi +- ▁Mais +- unk +- ▁사 +- த்தில் +- iden +- cc +- ▁这就是 +- زن +- 名字 +- кра +- ▁trois +- 您的 +- door +- 刻 +- ▁mille +- ген +- スタート +- ▁cui +- ▁Will +- ▁sacred +- やり +- च +- зу +- system +- ▁mud +- inter +- ▁одна +- ▁quand +- dienst +- ▁hon +- แ +- 这位 +- ▁blessed +- ▁pushed +- ▁reco +- 瓦 +- ▁ariko +- 大多数 +- ▁наш +- ▁Wahl +- ▁reign +- 장 +- air +- kum +- 来看 +- دة +- ▁delay +- стро +- жу +- ▁mont +- ▁fought +- seite +- ог +- ▁reported +- chester +- 他就 +- gged +- ▁bare +- ▁sigh +- 薄 +- ▁thunder +- شی +- 年間 +- いつも +- アン +- ▁Yes +- ▁Präsident +- ▁northern +- ▁Sir +- ले +- 当地 +- dik +- sy +- tia +- liv +- عمل +- essa +- 軽 +- 议 +- ▁modest +- 蛋 +- ngi +- gesetzt +- oto +- ▁empire +- geführt +- 人たち +- nej +- ▁Ihrem +- ъ +- ▁waste +- ыг +- 被害 +- ▁İ +- bile +- ▁delle +- 弗 +- でき +- 当我 +- ▁quan +- ▁collection +- ▁கா +- ▁prima +- 杯 +- ▁photograph +- ▁veel +- 压力 +- tif +- 典 +- ▁wings +- мә +- ச் +- rij +- ▁dreams +- 司 +- ▁dur +- 予想 +- ▁gained +- 怎么了 +- при +- ▁내 +- ▁سر +- ▁horrible +- 人物 +- пар +- 我能 +- ▁fled +- 超过 +- 干什么 +- cí +- rais +- фа +- 調 +- vision +- ăm +- ars +- ▁easier +- ▁могу +- ▁Probleme +- 있 +- 顺 +- facebook +- ▁sofort +- たま +- not +- zog +- 戏 +- isme +- ▁آنها +- ే +- 显示 +- ▁funktioniert +- wali +- なんと +- 本身 +- 姿 +- 永 +- ▁stairs +- ▁sale +- ulo +- ép +- roj +- ▁Port +- ▁proceed +- 甘 +- ▁copy +- ▁burn +- ▁bearing +- 確認 +- ▁consequence +- ▁completed +- 不错 +- 在一个 +- ▁се +- fred +- 我喜欢 +- ▁employed +- 瑞 +- ▁Oh +- ▁banks +- ห +- 棒 +- ▁enormous +- 烧 +- ▁sympathy +- ▁Lage +- vet +- ▁Some +- ▁concern +- cré +- ங்கள் +- α +- faced +- richtet +- ▁factors +- اح +- 哭 +- ▁nada +- ▁Tor +- ▁philosophy +- ▁clouds +- レー +- ▁gesprochen +- ▁tight +- asta +- born +- 既 +- head +- indi +- ▁By +- ▁instinct +- ▁algo +- 幕 +- ゲーム +- 困难 +- rr +- 我这 +- ҡа +- ▁elements +- の中に +- lık +- ▁congress +- ▁你要 +- ▁album +- ▁London +- ▁relative +- typ +- cted +- 专业 +- ▁connect +- ▁restaurant +- lier +- ▁climate +- ▁goal +- ▁Gespräch +- 则 +- ▁mood +- ▁classes +- ▁introduced +- ▁gì +- üt +- ο +- ▁schwierig +- ▁anche +- лег +- ▁maid +- 持续 +- ава +- ▁König +- 振 +- tique +- dda +- ▁odd +- cord +- tit +- 试 +- dit +- ▁segment +- ņ +- 転 +- ▁conscience +- ▁retreat +- ▁По +- ▁jar +- 難 +- ▁earn +- ▁Por +- sión +- 我真的 +- 盗 +- それから +- ▁التي +- kro +- عة +- '!?' +- ▁version +- 施設 +- ▁Fin +- 那就是 +- 徒 +- 对我来说 +- ▁cave +- ▁medicine +- ▁application +- ции +- 约翰 +- ▁Bild +- ▁informed +- zio +- rant +- ▁gör +- 说你 +- ▁谢谢 +- 大量 +- ajn +- 腿 +- جا +- 傷 +- аж +- ▁previously +- pho +- ▁immediate +- 痛苦 +- 敬 +- 他们会 +- ▁witness +- ▁interpret +- 避難 +- gebracht +- じゃあ +- ▁benefits +- ▁morgen +- ▁reference +- ▁feed +- ▁chu +- 终于 +- mmel +- 年に +- 하고 +- ▁එ +- ▁anxiety +- ▁severe +- ▁via +- اش +- 移动 +- れて +- пы +- ẓ +- ▁discuss +- 到达 +- のように +- ▁但是我 +- ▁returning +- オー +- нес +- 捕 +- ▁recognize +- ▁Vielleicht +- ▁nord +- 取材 +- 怎么办 +- 打电话 +- ▁falsch +- ช +- ▁Mark +- gomba +- ▁cheap +- ишь +- 突 +- ▁recon +- 動き +- 好吗 +- 炎 +- ▁kit +- ▁Of +- ▁ease +- ▁yards +- рь +- ▁engagement +- だと思います +- oro +- 大丈夫 +- ubi +- ▁games +- ▁Musik +- となりました +- 状 +- 季 +- ものを +- ▁slide +- aza +- segu +- führen +- 注 +- さんと +- 他会 +- 想法 +- ▁hurried +- termin +- تي +- ▁mostly +- sun +- める +- ▁wheel +- kem +- 反对 +- ▁intend +- いって +- 知识 +- ▁amongst +- ún +- ▁countenance +- fite +- ワクチン +- 全然 +- ▁variety +- ▁thomas +- 形式 +- ▁بە +- ▁intelligent +- force +- 值得 +- acht +- jem +- 用户 +- ▁الش +- нага +- quin +- rri +- hle +- ▁rush +- тов +- 说什么 +- ▁అ +- овать +- ▁Sin +- ▁کنم +- anto +- 吓 +- いっぱい +- ▁partly +- ▁hinaus +- ▁guilty +- isse +- ▁fox +- stead +- 確 +- ▁rope +- 出場 +- ▁вос +- 面对 +- ▁assistance +- ▁gesch +- ▁Fo +- いつ +- கொண்ட +- tech +- 据 +- ▁Miss +- ▁tools +- lau +- 逃 +- 大臣 +- тив +- лов +- ▁которая +- ▁Ber +- なんで +- 显然 +- ▁solemn +- 楽しみ +- 辛 +- ▁geworden +- ald +- ▁eager +- ▁counter +- 我们现在 +- mana +- ▁consciousness +- iendo +- ▁为了 +- iba +- ▁너무 +- 胸 +- ▁També +- ▁scho +- ▁vent +- xon +- ▁candle +- ▁Ш +- ▁lion +- ▁combat +- 关心 +- ije +- ▁located +- ▁شود +- staat +- ▁resolution +- 検査 +- ▁august +- ▁disse +- 交通 +- گو +- ▁depend +- ▁Von +- かい +- ▁Gen +- 不安 +- はこの +- ▁你是 +- chu +- ▁Programm +- aku +- luc +- ▁joke +- مه +- lands +- эг +- ▁adult +- ▁Да +- が出 +- 成员 +- 姐姐 +- 照片 +- ▁Ap +- ▁consent +- rer +- ▁tief +- ▁rub +- 察 +- ып +- 全球 +- 族 +- 落ち +- ▁ließ +- ▁High +- feld +- tul +- cl +- 事实 +- 하 +- 今日は +- 响 +- rés +- ▁Ger +- рт +- ও +- ▁Herz +- 你别 +- ▁سال +- 的朋友 +- ▁neces +- ▁pitch +- gno +- ▁tai +- kie +- ▁notion +- ▁yu +- eks +- kora +- ▁victory +- hur +- ▁Bas +- agi +- のでしょうか +- 映画 +- 评论 +- کە +- ▁Pol +- どんどん +- ▁enable +- ▁marketing +- ▁número +- 摩 +- ▁reduce +- ▁cela +- වි +- ్ +- ▁cru +- byo +- ril +- ▁push +- ▁ເ +- yen +- ▁bre +- ▁flash +- やる +- ▁خو +- すぐ +- ▁improved +- 持ち +- まる +- ät +- had +- ря +- 糖 +- かもしれません +- 负 +- دم +- anyi +- baza +- ▁überhaupt +- 鸡 +- れた +- ▁rushed +- 回来了 +- оль +- ▁Una +- ▁obtain +- 사 +- ▁refuse +- 语言 +- fangen +- ▁Eu +- 選手が +- kka +- 魚 +- ▁wider +- 骑 +- 善 +- ▁eternal +- قر +- ▁trick +- ▁así +- ongo +- ▁worn +- ▁stores +- рос +- 制造 +- lied +- rica +- raf +- 十年 +- omo +- 、2 +- ▁smell +- rav +- ▁pensa +- ▁continent +- ▁stupid +- となった +- 接触 +- ▁werd +- ▁Para +- 闹 +- ▁stir +- ▁score +- 구 +- とても +- 者の +- ▁こちら +- 属于 +- çi +- том +- 説明 +- 受け +- ▁gest +- サン +- ului +- ▁slip +- 없 +- 増え +- ▁apple +- のかな +- 偏 +- 承认 +- ▁surrounded +- ▁Zukunft +- ▁valuable +- ▁führen +- 答案 +- ▁hätten +- teilen +- ▁einigen +- 孙 +- ▁rw +- handel +- ▁approached +- cis +- ▁exchange +- 、1 +- ▁internal +- ▁мал +- まり +- ラー +- ▁gegenüber +- 埃 +- lain +- ▁absurd +- 全体 +- ция +- ▁phil +- 上海 +- ▁associated +- ▁điều +- ▁cam +- ▁William +- ▁dont +- ▁পা +- шла +- 技 +- ここまで +- ▁abantu +- kna +- 章 +- hem +- 例如 +- ▁Dass +- ▁softly +- qa +- ▁Pra +- 不再 +- 抽 +- ▁họ +- sehen +- ▁sunt +- rada +- を見せ +- ▁நா +- lou +- ▁kindly +- رة +- уб +- 巨大的 +- 作り +- bling +- ▁fet +- ▁moreover +- quel +- ▁uniform +- laufen +- ▁stable +- ▁байсан +- ▁dal +- ▁clients +- wol +- だよ +- ismo +- コロナ +- 材料 +- ▁bitte +- ▁genius +- mā +- があって +- fat +- ▁escaped +- guard +- 男子 +- 在你 +- ▁beginnen +- ▁substance +- ▁bush +- 小心 +- 呆 +- ▁suggestion +- دان +- 跟他 +- ▁limited +- ーン +- ▁offen +- ▁unserem +- 伤害 +- cel +- ▁يمكن +- mate +- 独立 +- giye +- ▁mur +- ▁robot +- etti +- esc +- grund +- ▁Bra +- 负责 +- ▁situa +- 开心 +- ▁Don +- 羽 +- ▁dessen +- ▁plane +- ことで +- gü +- ▁куда +- ▁closer +- ▁precious +- 거든 +- овская +- است +- ▁convinced +- ▁bomb +- tà +- จ +- 種 +- ▁früher +- май +- ał +- ▁confess +- そこに +- ▁жа +- 因为我们 +- ▁phải +- вой +- кс +- ▁curiosity +- نو +- 図 +- 別 +- ▁десять +- tting +- йте +- ▁kiel +- ▁Meine +- できた +- ▁sap +- 大きく +- ▁addressed +- 康 +- ota +- சா +- نه +- kali +- тым +- ▁shouted +- ▁этим +- гә +- ▁сер +- gere +- 総 +- ▁우리 +- ▁concept +- here +- のために +- ▁unten +- ▁National +- sek +- ▁crea +- ▁hare +- ▁button +- なのか +- উ +- これまで +- 関東 +- quet +- 当他 +- 弟 +- ▁نو +- ▁நி +- 博士 +- stal +- 佐 +- amo +- ▁gal +- mé +- ár +- zing +- ▁zwanzig +- ▁nachdem +- بو +- していく +- ▁pressed +- ▁neza +- 有关 +- ▁dignity +- ▁meanwhile +- ▁Millionen +- ▁causes +- ▁poison +- を取り +- pir +- ▁arrive +- ▁mia +- ▁flood +- ory +- ▁thu +- 放弃 +- 早く +- ckt +- 的那个 +- anar +- 虚 +- ▁lights +- 雲 +- wart +- ▁같애 +- مو +- ▁doen +- ▁phrase +- そうですね +- 的一 +- ▁cum +- யை +- Co +- 込んで +- ▁tiu +- idos +- سب +- ▁今日は +- ▁musste +- 似 +- қ +- ▁flew +- ▁Mittel +- 给我们 +- ライ +- ▁bigger +- prov +- ▁Qui +- ▁hanging +- schrift +- してください +- ▁За +- 犯罪 +- ▁purchase +- ▁quarrel +- üs +- ▁seg +- ণ +- ▁federal +- ී +- 说我 +- ▁también +- ▁yari +- apa +- nja +- ▁succeed +- ▁hielt +- ▁recht +- Ma +- 正如 +- 焼き +- ▁besten +- fam +- 論 +- おります +- ุ +- くて +- tation +- 住宅 +- وه +- ▁mod +- ▁coal +- ▁italian +- ほ +- пан +- tine +- ▁però +- ▁render +- ▁South +- 欠 +- 广告 +- реш +- をする +- кий +- schuldig +- ▁programs +- рен +- ぼ +- hé +- ▁cancer +- ▁verk +- ük +- ▁miserable +- श +- azione +- ▁bunch +- ık +- ▁fierce +- fie +- ▁appropriate +- ▁healthy +- ▁subjects +- ▁kre +- qué +- ▁día +- ▁Ama +- ych +- ▁fatal +- 馆 +- kiri +- ▁لە +- 菲 +- 每一个 +- ились +- форм +- hoo +- вор +- vingt +- ▁assume +- 消息 +- ▁swift +- ▁سو +- elt +- ▁competitive +- ▁hasta +- ▁aquí +- 小时 +- 既然 +- させ +- gene +- stoff +- ▁opera +- ▁М +- shing +- lies +- 連続 +- voy +- ▁mamma +- めて +- unda +- ▁hint +- ▁offering +- 吴 +- ▁luck +- ▁Medi +- iu +- 賞 +- lte +- 一份 +- 国际 +- ▁crisis +- ▁creatures +- raum +- لك +- ▁Roedd +- lut +- ▁tremendous +- ▁Esta +- swa +- রে +- ために +- ρ +- ▁micro +- 发布 +- kov +- ▁monde +- ▁shoot +- ▁rage +- ín +- ன்ற +- ident +- ▁reflection +- ▁stern +- 部屋 +- 太多 +- ▁forgot +- 殺 +- ▁arthur +- 広 +- ▁zwar +- lina +- を使って +- ▁rob +- эй +- ▁destroyed +- idi +- ello +- ▁activities +- cole +- ▁nein +- ▁dell +- pada +- 여 +- ▁Ser +- ▁tells +- ▁painful +- ones +- 子供 +- ▁他说 +- ▁politics +- 还在 +- 观众 +- ▁Seine +- ines +- 记住 +- ▁branches +- 僕は +- 更加 +- wala +- ▁Nord +- satz +- ▁appointed +- ▁że +- きて +- რა +- uring +- 決 +- äng +- ▁sens +- tischen +- 表达 +- pia +- ▁acquisition +- ▁تم +- 疾病 +- ▁directed +- ▁bwa +- 持って +- 某种 +- ▁chap +- یر +- хан +- ▁keen +- はない +- ▁lover +- ▁nou +- 有一 +- ▁Sicherheit +- ▁hum +- ▁observe +- 岩 +- ▁accompanied +- ▁laughter +- ▁ஒ +- jor +- دە +- ▁Hel +- rā +- ▁whilst +- 時代 +- ▁waves +- 意味 +- ▁survey +- ид +- 作業 +- 伝え +- ▁Sprache +- schrei +- ▁distinct +- 沈 +- един +- يم +- வர் +- dzie +- 赵 +- ▁spin +- amba +- 雅 +- ▁دست +- 很多人 +- 北海道 +- ありがとうございました +- schließen +- ▁porta +- லா +- vic +- ▁tren +- hum +- ▁Eltern +- பெ +- 澳大利亚 +- なんですね +- ▁많이 +- ▁branch +- 咋 +- 商业 +- ▁throne +- ▁burden +- ▁martin +- ▁alors +- fertig +- ▁rela +- 情绪 +- 抓住 +- 留下 +- 因为你 +- ▁totally +- 并没有 +- ▁sector +- ▁launch +- ▁leaders +- ▁shouldn +- 众 +- ▁cái +- 形成 +- 寺 +- ▁decline +- ▁Thema +- ▁fifth +- 从未 +- ▁thoroughly +- ▁Prozess +- La +- ệ +- ▁Bla +- 一块 +- box +- will +- なんですけど +- 终 +- ▁sugar +- ří +- роб +- ▁» +- én +- ▁votre +- 撮影 +- ▁warning +- ▁hundreds +- ▁Это +- ▁divided +- からは +- 重要的 +- 我希望 +- ▁string +- 合い +- دن +- ▁fand +- ▁Politik +- dı +- ▁stated +- 充 +- 映像 +- 目标 +- سي +- ▁household +- ▁suis +- ण +- ▁hut +- ι +- 几天 +- ▁dollar +- 鸟 +- 病院 +- 在这儿 +- ís +- rid +- riye +- 我对 +- プレー +- ▁punt +- 戸 +- ▁City +- ologi +- صد +- ▁tear +- رد +- emos +- ணி +- turn +- ▁pea +- 设 +- ье +- 齐 +- からね +- 表现 +- 早上 +- уул +- 盖 +- ▁degrees +- 四个 +- ▁hurry +- 爆 +- ▁প +- ▁plej +- ▁forever +- ė +- пад +- ders +- ▁دی +- ▁remote +- ▁mount +- ▁privilege +- uj +- bata +- のこと +- ぜひ +- cket +- ▁hari +- ▁critic +- ▁Fer +- ▁hopes +- மான +- ▁applied +- かけ +- تون +- ▁kwam +- ▁cyangwa +- ▁Macht +- ▁lift +- 流れ +- gram +- 平台 +- 写真 +- 拒绝 +- гор +- ು +- ▁Gemeinschaft +- ников +- ▁regarding +- ▁reduced +- 生产 +- 頂 +- ▁joseph +- ▁Boden +- ▁shining +- pela +- ວ +- dacht +- 植物 +- ▁heel +- 학 +- これが +- ったら +- 是有 +- ▁Während +- ▁weary +- 大概 +- 坂 +- ▁völlig +- ▁años +- chy +- ▁день +- ▁wound +- えて +- ▁combination +- oy +- autres +- инская +- ван +- asse +- кол +- ▁States +- ▁эту +- دو +- ▁meu +- ▁sollen +- 出て +- 人口 +- ል +- 我们需要 +- ▁humble +- به +- ▁bang +- most +- 自我 +- inda +- ▁rolled +- 报道 +- 睡觉 +- 暖 +- ے +- ▁crazy +- ▁seriously +- 较 +- 니 +- 我只是 +- 虫 +- ▁remaining +- rra +- ▁Ten +- unter +- pr +- 入り +- ngo +- ▁punishment +- ▁episode +- ▁zeide +- kop +- ▁sank +- いきます +- ▁file +- ▁Durch +- ▁Selbst +- ▁Projekt +- ▁ring +- ▁prisoners +- ▁pes +- ▁Fu +- 絶 +- ால் +- 言い +- ▁народ +- cca +- ▁dad +- ▁hero +- ▁vos +- ▁Mus +- ▁helen +- ▁생각 +- ▁Schritt +- ていました +- shire +- adas +- ▁pricing +- ▁Entwicklung +- 子ども +- 的情况 +- dim +- 码 +- gur +- gging +- vul +- ▁pays +- ліся +- 夢 +- ▁holiday +- hus +- ▁remarks +- ▁Rolle +- ған +- ほとんど +- ිය +- '23' +- 专 +- ▁strongly +- ▁solutions +- tang +- ▁اما +- ake +- tara +- 比赛 +- lad +- 律师 +- ▁uw +- ап +- 団 +- bur +- ▁types +- 欧 +- ▁assist +- 海外 +- ▁california +- ▁pig +- ▁trap +- ▁gall +- ös +- dica +- gled +- ▁architect +- ▁Ph +- ▁Blick +- 自宅 +- ▁onze +- ▁我也 +- ▁Geist +- 把你 +- ▁wicked +- ▁wolf +- 結婚 +- 记得 +- ▁ああ +- ▁breaking +- şe +- 尝试 +- ound +- ▁views +- 每个人 +- ское +- তি +- ▁pool +- 過 +- ▁trat +- ▁attended +- عا +- ▁profound +- 大部分 +- 应该是 +- ▁humanity +- în +- mad +- ここは +- 調べ +- ▁loving +- 流行 +- ▁arose +- 高校 +- 智 +- あっ +- 不仅仅是 +- 国会 +- 最も +- てきた +- ▁sous +- ▁Но +- зд +- ▁movie +- camp +- lieb +- шу +- ▁kindness +- ς +- ▁我知道 +- gut +- ▁literature +- 斯特 +- tam +- ▁presentation +- ことし +- ▁designed +- わけです +- km +- ▁expenses +- oko +- 我们都 +- ▁George +- 優 +- 交流 +- 献 +- 失败 +- ひと +- ▁vy +- ▁border +- ▁pilot +- 集中 +- ▁tegen +- ም +- 基础 +- ▁inquired +- つか +- ▁assets +- ▁ibyo +- ▁slept +- ▁mismo +- ▁zien +- 登場 +- iḍ +- ▁interrupted +- ▁constitution +- ▁nodded +- archi +- ключ +- 还不 +- ▁هي +- ▁alas +- ▁merit +- 비 +- ▁начал +- ▁буду +- ▁Nur +- ▁thrust +- luci +- ▁الق +- ▁adventure +- ▁puede +- ▁debate +- пуст +- ▁paying +- 诗 +- mata +- four +- hari +- mě +- ▁enterprise +- については +- ▁Ei +- fit +- প্র +- সা +- ▁atmosphere +- ண்ட +- ▁இது +- 煮 +- ▁cottage +- アル +- ▁ஏ +- ▁cattle +- ில் +- arte +- ▁pole +- luk +- ▁secretary +- লি +- ▁leven +- 因为他们 +- stä +- 困 +- ▁Aquest +- 豪 +- 功能 +- ▁peer +- вол +- 两个人 +- ມ +- ▁earl +- 偷 +- ▁möchten +- ▁kuko +- ▁گ +- ost +- тен +- ▁прав +- ▁manager +- ▁пыта +- ▁Robert +- த்தை +- ject +- iten +- ғы +- 本当 +- 异 +- ▁faster +- ▁interior +- füg +- вя +- ▁Wert +- ▁cel +- frage +- مة +- baka +- 目の +- ▁Energie +- ▁leap +- muntu +- 的新 +- ▁margaret +- ▁weakness +- ▁steam +- ▁souls +- 정 +- ▁danke +- ▁Ма +- ▁eventually +- ché +- 我们就 +- 哥哥 +- ▁약간 +- 安排 +- lant +- 扔 +- ▁guests +- ско +- ▁click +- 頃 +- ▁Gruppe +- organiz +- ნ +- 水平 +- lop +- ▁tanto +- vý +- பி +- ▁imagin +- ▁Peter +- ▁rosa +- ▁Tat +- ▁bai +- 成了 +- uur +- gio +- ▁Ac +- haw +- が多い +- esse +- そうだ +- ▁کنند +- avoir +- ▁seus +- 国際 +- ា +- نىڭ +- 売 +- ▁spanish +- 宽 +- 聪明 +- 反应 +- ▁widow +- 不敢 +- ▁unit +- アメリカの +- ▁suc +- 質 +- ▁unhappy +- ▁flame +- 对吧 +- ▁tide +- ▁dijo +- ậ +- ▁shift +- 帕 +- 韓国 +- azi +- ▁déc +- 经验 +- 达到 +- ▁notes +- 랑 +- ▁fare +- 怒 +- ▁catholic +- 技術 +- 资源 +- ▁Nous +- bila +- ▁stared +- ▁tied +- quest +- 各位 +- ▁thirteen +- vano +- 对方 +- ağı +- ▁abge +- ssel +- 因为它 +- ▁veil +- ▁chat +- ▁또 +- establish +- 以外 +- ި +- raj +- மாக +- ▁auszu +- გ +- lance +- ▁anzu +- غير +- kara +- 答え +- 晓 +- மே +- ▁essential +- 当時 +- 번 +- nyuma +- ▁Stimme +- lage +- ै +- ört +- を持って +- ▁anderes +- 亿 +- ▁Sol +- игр +- 兴 +- リン +- த்தி +- wachsen +- ってこと +- ▁plu +- ▁partners +- ▁eggs +- ▁largely +- ▁hadn +- ▁enjoyed +- ው +- aid +- 상 +- ▁despite +- ▁Os +- 房间 +- ▁Union +- кр +- 五十 +- ▁distribution +- ▁pues +- 人々 +- 客户 +- ▁intellectual +- ▁cred +- ▁buildings +- 国王 +- ▁Show +- ios +- ▁baş +- ▁wholly +- ▁parent +- ▁neat +- лын +- ▁ehhe +- ▁braucht +- ▁stud +- 入れ +- ▁verloren +- ▁guest +- wyr +- ちゃう +- enti +- sak +- ران +- ▁kr +- аа +- 简 +- شا +- gegeben +- ▁mate +- 亲爱的 +- 增长 +- 意义 +- あり +- ▁đang +- ▁delighted +- 婚姻 +- trau +- ▁handle +- ▁مح +- ゲ +- 伟大的 +- مت +- ▁steady +- 健 +- ▁Cal +- ▁Bank +- ances +- ▁Tôi +- ▁physician +- his +- pren +- ambi +- ڭ +- laden +- ▁stayed +- などで +- ▁patience +- ▁utterly +- ▁오 +- ▁Dieser +- 牙 +- ▁primarily +- ▁Grunde +- ▁esper +- كون +- י +- ony +- ▁context +- kaj +- தா +- ▁sud +- là +- Le +- ▁іх +- ▁Mer +- ▁annual +- сил +- 中の +- ▁fro +- цу +- вали +- ▁contrast +- ▁Technologie +- 爷 +- ▁lernen +- 泰 +- burn +- 有一种 +- দি +- 禁 +- An +- 和他 +- uld +- ▁будут +- 幸 +- 史 +- coming +- ▁entwickelt +- ache +- 呼吸 +- hara +- ▁trump +- ▁hey +- post +- 子さん +- ▁beste +- asa +- ▁Zo +- füll +- 这么说 +- 挂 +- ▁arranged +- 悲 +- 一つ +- ics +- лек +- ▁coup +- 办法 +- மி +- があり +- 一边 +- ▁pink +- ▁расс +- 保证 +- ▁follows +- ▁scar +- ировать +- 御 +- ▁retail +- 另一 +- ダー +- эс +- 文件 +- ▁symbol +- جر +- 爸 +- 现场 +- 透 +- diri +- след +- teur +- σ +- というふうに +- 给了 +- ▁contained +- 放送 +- ddle +- ▁budget +- ▁"" +- өл +- ▁kamen +- ▁Met +- wag +- ▁کل +- ▁biggest +- ▁gross +- ▁feature +- Л +- fun +- 圆 +- ▁distinguished +- ▁studied +- kunda +- ▁dispose +- 人を +- ▁Hy +- 得到了 +- ▁Af +- Ar +- fus +- バス +- rai +- ▁obwohl +- innen +- ▁shoes +- eme +- そうなんです +- او +- kra +- ▁sensation +- mission +- ▁всегда +- raga +- ▁hoffe +- 心理 +- ▁ŝi +- 했 +- ▁properly +- ▁pala +- 那边 +- 大統領 +- ▁prophet +- ▁parliament +- нова +- tera +- fil +- というところ +- 就不 +- hou +- ▁consum +- ▁tomb +- ު +- uze +- ていく +- 羊 +- ▁lit +- 适合 +- ▁Gesellschaft +- としています +- ▁созда +- ▁desk +- ▁হ +- ▁አ +- rek +- ▁đến +- 带来 +- твор +- 题 +- год +- ▁russian +- に出 +- ▁Luft +- '22' +- dank +- 再说 +- 斗 +- వ +- ▁jedem +- ▁பா +- ▁knife +- 反正 +- 開発 +- ▁crying +- ▁moet +- sser +- ▁operation +- ▁confusion +- ರ +- に入って +- master +- 缩 +- ▁этих +- ▁continuing +- ▁rival +- ły +- ▁articles +- ▁кажется +- ▁remark +- ▁damage +- ご覧 +- nto +- ▁Google +- ▁strip +- 銀 +- お店 +- tse +- ▁iets +- ▁devoted +- 絵 +- ▁jump +- 园 +- 女士 +- 世界上 +- ▁log +- 丈夫 +- ▁traffic +- ▁所以我们 +- ünde +- ▁deu +- ▁abandon +- ▁tale +- 审 +- şa +- ▁bah +- ▁тот +- bble +- 寄 +- ▁tis +- ▁musical +- 开发 +- ▁deposit +- 阅读 +- two +- のような +- كن +- 项 +- 决 +- ▁num +- tiva +- 中に +- ▁چه +- ıyor +- ▁persönlich +- kta +- ён +- ▁Mc +- that +- ander +- ▁perceived +- 東京都 +- ▁Ter +- ▁hunting +- ▁companions +- ▁emotion +- cious +- 부 +- 升 +- さまざまな +- 记者 +- 绝 +- ್ +- ▁ends +- ▁الإ +- 细 +- ias +- thro +- ▁bor +- fei +- ▁Inter +- 的感觉 +- 任务 +- ▁surrounding +- ▁хочу +- ▁tar +- ▁mixed +- ▁Schüler +- ▁স +- さま +- ▁spending +- ▁Parlament +- see +- пор +- 一段 +- 我和 +- guru +- ▁beau +- еш +- аш +- ▁என்று +- zor +- ▁sisters +- вать +- ის +- bei +- ites +- ▁yours +- лось +- ▁друг +- ▁Just +- ▁expedition +- ▁utter +- ▁செ +- ▁transform +- ▁hollow +- lev +- ▁lonely +- ▁sonra +- క +- ▁zijne +- үү +- ▁erkennen +- ▁té +- ▁هر +- ▁komen +- ktion +- сло +- 利益 +- 真的是 +- ड +- tzen +- ▁mare +- late +- ▁shake +- ▁servi +- ▁homes +- しながら +- भ +- 法院 +- ▁multiple +- ▁Fri +- あまり +- ▁exception +- ▁reader +- 潮 +- ▁discussed +- 什么样的 +- ▁trop +- ▁две +- nä +- irwa +- ▁uko +- 塁 +- hing +- sack +- いただきます +- 肯 +- 盛り +- ological +- ▁Facebook +- ▁poco +- cles +- schauen +- 出し +- تن +- 她说 +- чит +- ▁spi +- zelf +- 沢 +- ▁helping +- খ +- ▁Dat +- 价格 +- 看到的 +- ▁деле +- ▁kap +- ▁accordingly +- ӹ +- ▁operate +- ▁wer +- 委员会 +- ▁meng +- ▁literally +- tari +- 左右 +- лом +- ▁bus +- ▁permanent +- 效 +- 四十 +- хий +- pol +- vous +- rè +- こうした +- ▁arise +- こっち +- ▁lại +- ্যা +- teri +- ▁driver +- 十五 +- ▁American +- соб +- ù +- ▁heap +- ▁barn +- nger +- ▁Cas +- すれば +- ▁rifle +- ▁Produkt +- ▁After +- ▁まず +- 中间 +- ▁stem +- ▁background +- stehen +- ▁Tage +- bij +- ▁schließlich +- ▁কি +- rte +- кай +- ▁writer +- 額 +- ▁duties +- ▁hungry +- ▁wegen +- ▁piano +- ▁pretend +- 運転 +- ▁sleeping +- mpa +- 大脑 +- iw +- 部门 +- ▁doctrine +- ▁define +- dian +- 美元 +- mmen +- 提醒 +- ▁efficient +- ▁続いては +- ו +- eo +- ▁plötzlich +- ສ +- ▁несколько +- きょうは +- denken +- дә +- ▁President +- ▁occasionally +- 不喜欢 +- ▁James +- ▁perd +- ▁eigen +- 今日の +- ▁nombre +- under +- 津 +- ▁estaba +- ▁бел +- ▁rod +- ▁mala +- ▁sovereign +- 今回は +- anno +- ▁понял +- مە +- ▁tiempo +- 平均 +- ▁initiatives +- ▁Tro +- ▁тре +- got +- ▁industrial +- uro +- ▁Weil +- ▁beast +- ▁spell +- ことに +- ▁conviction +- ▁sorts +- ▁sistema +- ▁visited +- lines +- ▁olma +- enz +- 寝 +- 伴 +- ▁《 +- 状态 +- ▁tele +- ▁quit +- ▁poder +- registr +- ▁你怎么 +- 将军 +- 战斗 +- leben +- 途 +- ্য +- 触 +- ▁absolut +- 朱 +- bis +- 懂 +- ▁cells +- 里边 +- stadt +- ▁kings +- はず +- ▁divi +- ▁bless +- ▁einge +- 不需要 +- ် +- ▁queer +- bir +- ▁свет +- ич +- alter +- çe +- 先月 +- ▁lawyer +- ▁кур +- now +- еж +- ▁formal +- ▁город +- ▁extended +- ▁poverty +- 許 +- буд +- 湿 +- bringen +- 贴 +- ▁từ +- ▁verlassen +- ▁Folge +- ▁lest +- 那天 +- ▁composed +- ▁innovation +- quen +- ▁вз +- ▁striking +- 锁 +- ▁fellows +- サー +- ▁Team +- ▁lieutenant +- ряд +- ▁loro +- ▁gefunden +- ▁Freunde +- ▁Minuten +- 地震 +- ▁これが +- ▁generous +- ▁толькі +- rib +- 経験 +- ヘ +- hap +- 家的 +- pid +- iente +- 丢 +- chel +- mod +- ▁sagten +- enge +- ▁exciting +- 되 +- 月に +- ▁spielen +- ▁destruction +- ▁すごい +- 接近 +- ович +- ▁stiff +- constru +- pur +- 想想 +- ▁Wochen +- 岸 +- tsi +- ▁premier +- 见过 +- ▁Non +- ▁possibility +- ▁responsible +- ší +- ▁expand +- স্ +- ị +- Be +- ▁strain +- ڵ +- ▁интерес +- ister +- dau +- ▁singular +- ▁location +- ▁prospect +- 変化 +- ▁suo +- ▁genannt +- ▁stress +- лд +- ubwo +- 検 +- るの +- бли +- жээ +- ▁Sein +- ▁cả +- 相手 +- ▁вторая +- ▁Morgen +- comb +- ▁troba +- вин +- igo +- 那么多 +- ▁должны +- いで +- ās +- ▁Dis +- ▁那我 +- 昨天 +- geld +- ▁acts +- ▁Trump +- ▁boats +- ジャ +- ▁eigene +- 隔 +- 输 +- іць +- лин +- ▁Schule +- ▁যা +- пре +- ▁association +- ▁trembling +- 猜 +- вая +- ரை +- loop +- ▁recall +- どうぞ +- buch +- '300' +- ▁gibi +- ▁stretched +- ட்டி +- ▁россии +- ▁нэг +- 5% +- ▁performed +- グループ +- ıl +- ▁proposal +- ▁semi +- 捜査 +- ▁sehe +- 隆 +- ▁accustomed +- したのは +- ▁schlecht +- 长大 +- ▁unusual +- ress +- 很快 +- ▁frequent +- activ +- gezogen +- ▁germany +- ▁ça +- bound +- ▁باش +- ▁Fla +- ifica +- ▁anni +- یو +- ▁brings +- ▁вельмі +- лог +- ▁individuals +- maz +- » +- 걸 +- ▁Ordnung +- лё +- 大阪 +- ▁یا +- ேன் +- erte +- 不断 +- தான் +- ▁ale +- кла +- ▁glanced +- jya +- pis +- 是不 +- aro +- ▁Yo +- 我们必须 +- sprech +- چی +- ▁nav +- graph +- ▁großer +- uten +- 温度 +- ▁Cre +- ▁modo +- ▁mewn +- ▁wearing +- figur +- uye +- ▁North +- chung +- ▁kay +- ▁european +- るのは +- ▁billion +- служ +- 細 +- ▁hunt +- ▁responsibility +- ▁ও +- ▁говорит +- ▁grateful +- 太阳 +- ▁fragen +- 抢 +- ▁অঁ +- ▁dice +- ▁riding +- ▁tard +- dition +- ファン +- ▁있어 +- ▁sports +- π +- ▁india +- ья +- বি +- cum +- 妻 +- 无论 +- 意外 +- produkt +- やっぱ +- 而是 +- ありません +- 症状 +- ▁Op +- bio +- ▁victim +- ▁wanna +- owa +- ▁Ak +- ▁самом +- ▁Glück +- rse +- سه +- 我们已经 +- rede +- sept +- 霍 +- ▁Gre +- dwa +- nant +- 仲 +- rack +- ▁thì +- 我将 +- ▁butter +- ▁hace +- ▁Š +- 特朗普 +- át +- bera +- ▁делать +- 力量 +- ▁yok +- ▁داد +- het +- gis +- ▁Leuten +- 다고 +- för +- ▁agent +- ป +- 監督 +- تح +- 誰 +- anc +- ▁mehrere +- 作为一个 +- ▁değil +- 瓶 +- ▁지금 +- 下午 +- ▁katika +- 同学 +- ▁سا +- ▁invited +- ▁spare +- ▁represented +- eld +- 印 +- 撞 +- ▁swept +- ▁wishes +- 之外 +- などが +- ▁maiden +- ▁الف +- ▁prize +- ase +- phone +- 問 +- خر +- ▁concluded +- ▁repair +- gos +- ню +- liz +- ее +- 有可能 +- ▁ак +- ▁neben +- 造成 +- ▁nennen +- ▁Cy +- ▁oak +- овский +- rif +- lern +- ezi +- ▁Gemeinde +- ▁guten +- ▁hid +- ▁poetry +- ▁mano +- ▁такая +- ▁plot +- ▁selling +- ▁gekommen +- ▁Maria +- ▁Min +- 导致 +- ີ +- gelegt +- 操 +- fod +- 就算 +- لب +- 家人 +- 墙 +- ところで +- 脑 +- ▁Komm +- 版 +- なの +- ▁yap +- ▁proposition +- eze +- ▁latin +- 喂 +- gebaut +- cinc +- politik +- ▁そこで +- aquesta +- ▁але +- gala +- ▁actor +- ggi +- тя +- არ +- unu +- 荷 +- ĩ +- ээр +- ▁sil +- ▁поселок +- ▁Pat +- 了一些 +- uren +- found +- 摇 +- ент +- ▁signed +- くなる +- ▁display +- 不一样 +- やつ +- 内部 +- ▁tema +- 演讲 +- ▁select +- ▁ashamed +- 決勝 +- 户 +- ▁elder +- 寒 +- jin +- 发生了 +- wyn +- स् +- 孩子们 +- ちゃ +- ▁rude +- ▁именно +- әй +- 生气 +- 販売 +- ▁yw +- ▁movements +- 好き +- ▁feared +- 实际 +- ▁dun +- ▁creation +- ▁pied +- 腕 +- point +- жил +- யி +- Bri +- ▁cad +- genda +- ▁kissed +- 标 +- ▁chest +- ▁cheeks +- 避 +- 冒 +- 宗 +- kwi +- ▁Tal +- ▁cycle +- ▁geschrieben +- られない +- lian +- 映 +- lebt +- 帰 +- ▁adopt +- たり +- kola +- ▁payment +- ான் +- ▁hol +- త +- ▁bible +- ▁achieve +- 意味着 +- hü +- пят +- ▁rail +- おい +- ▁действительно +- ▁Platz +- اط +- 卵 +- ▁materials +- رت +- ッと +- ware +- sses +- ▁disposition +- ▁glücklich +- tial +- fund +- ▁continu +- stimmt +- 話を +- kur +- ▁attached +- ▁mild +- ▁locked +- 染 +- フェ +- ▁recover +- ▁Qu +- 顾 +- 办公室 +- ▁sou +- kata +- hak +- 立て +- 桥 +- ள் +- лт +- バイ +- グラ +- ▁shout +- 一切都 +- 忍 +- ▁comments +- ▁United +- аться +- үүл +- ▁transition +- ▁manufacture +- 所以你 +- aine +- 習 +- Ge +- accord +- ▁przy +- ▁leurs +- ▁jsem +- ▁verb +- 也可以 +- 就要 +- 你去 +- ▁brush +- lov +- шта +- ▁jobs +- 份 +- ▁appreciate +- ガー +- 围 +- ▁maj +- emo +- ▁supported +- ▁abroad +- ▁unu +- ▁thấy +- ▁sc +- ้า +- ▁못 +- pes +- ▁Mag +- ின் +- ▁salut +- tatu +- ▁juga +- ් +- ох +- つまり +- 几年 +- ▁நான் +- ▁font +- ▁Kirche +- ▁knee +- работ +- ▁Zwei +- ▁distress +- ▁Bau +- 猪 +- wur +- hof +- ▁blessing +- ழ +- mera +- ▁heavily +- ▁الص +- cover +- 逆 +- 磨 +- 려 +- 签 +- 強く +- гра +- 含 +- ▁behavior +- وان +- ▁pine +- bewusst +- ▁neun +- ▁bones +- ▁それが +- ▁armed +- ▁quant +- нут +- 同志 +- るか +- ▁refuge +- 说过 +- ирован +- 播 +- 见到 +- いった +- ▁Bill +- ▁reserve +- ▁farmer +- roy +- 没有什么 +- ▁intimate +- شن +- சு +- ▁essentially +- ▁aquest +- ▁corre +- ▁sala +- ▁ändern +- 郎 +- んじゃないか +- ▁Tages +- ▁ここで +- ▁сама +- 厂 +- ▁хар +- 错了 +- ளி +- ▁june +- ▁competition +- ▁fois +- ilen +- リア +- 我不会 +- ▁ruth +- ▁opposed +- ▁penn +- ▁هستند +- 拖 +- ▁antwortete +- ▁jeune +- إ +- ▁repeat +- ▁greek +- 就说 +- ș +- '!」' +- ▁teachers +- ▁sy +- あなた +- 看来 +- isation +- ▁rya +- 一段时间 +- ▁belong +- obu +- ാ +- ▁traditional +- ▁painted +- ことも +- чь +- ▁Mrs +- сов +- ▁settlement +- いろいろ +- ▁کنید +- load +- ▁massive +- ▁zag +- ▁alternative +- dá +- aren +- メートル +- ▁retired +- ▁consult +- ▁feels +- njye +- 训练 +- ▁charming +- hop +- ▁尽管 +- wl +- ▁sole +- gad +- ▁karo +- nez +- tec +- 我告诉你 +- 国の +- jon +- ▁bio +- крас +- tali +- ▁việc +- ▁reasonable +- 制度 +- ▁kate +- bw +- юсь +- نش +- ▁schaffen +- 搭 +- ▁slipped +- 気温 +- tī +- ゴール +- ّ +- ▁adopted +- 更に +- para +- となっています +- ind +- ▁etc +- んでしょうか +- ▁scan +- ▁ambition +- iques +- ▁basket +- rau +- ▁bark +- 鞋 +- させる +- dora +- igu +- éc +- ían +- ▁supreme +- يف +- krat +- mund +- 卷 +- avo +- 暴力 +- ▁seldom +- hack +- urt +- ▁approaching +- 幼 +- др +- ▁murmured +- kni +- ▁goodness +- ▁Так +- zeug +- 合わせ +- Po +- 腰 +- ▁Val +- ▁consistent +- ▁rever +- ▁gap +- ▁gehe +- ▁dame +- witt +- ▁handelt +- ści +- dreh +- ▁pages +- يو +- 另外 +- ě +- nach +- ▁unfortunate +- headed +- 恨 +- path +- ましたね +- 主人 +- ▁совет +- rī +- ▁aren +- ять +- ▁depends +- flo +- miss +- ▁таки +- ▁zeigt +- ▁terre +- ▁commanded +- ▁noi +- bell +- ▁Tatsache +- cycl +- رك +- mani +- ் +- ▁muito +- duk +- lir +- ▁tidak +- ▁regiment +- izo +- ほか +- ▁clerk +- ▁vào +- ▁года +- 雄 +- ▁bullet +- tree +- ▁sempre +- hlen +- дэг +- erin +- rimo +- 银 +- ▁teams +- ▁foi +- ums +- ▁shortly +- ▁cure +- atge +- 恢复 +- ▁Hol +- づ +- ▁Test +- ▁drinking +- ▁released +- tics +- ▁eastern +- St +- ▁zaman +- 作用 +- ▁versus +- ense +- সে +- aught +- ▁жив +- 라고 +- 押 +- ▁reflected +- イギリス +- ▁Hund +- கை +- وع +- 交易 +- ▁blank +- ▁Situation +- ▁жизни +- ▁minor +- 大変 +- 으로 +- ▁obvious +- 环 +- 攻 +- ▁useless +- ▁blieb +- のほう +- ▁slaves +- 材 +- ▁également +- fal +- сын +- ▁tras +- овой +- ▁wherever +- рез +- ▁contest +- care +- ▁institution +- дав +- гло +- 的那 +- ▁contribution +- 这边 +- нне +- ▁administration +- 互联网 +- 了啊 +- ▁donde +- она +- ▁Demà +- ление +- ▁也许 +- ▁jour +- ▁বা +- ес +- 打算 +- ▁быў +- awa +- mere +- रा +- 我不是 +- platz +- 私も +- pè +- geven +- ▁ensure +- ▁importante +- 彼 +- ▁داشت +- 表情 +- тру +- ところが +- ▁vital +- bab +- 給 +- nio +- ▁nutzen +- ją +- 泡 +- 不得不 +- rend +- یان +- 柱 +- ▁glorious +- ensi +- 这事 +- ாக +- abu +- 有一天 +- ▁feast +- 동 +- ▁olan +- 把这个 +- ▁inhabitants +- キロ +- ließ +- 资金 +- 庄 +- ▁Tas +- area +- 気が +- gues +- يق +- шин +- 積 +- bug +- 練習 +- ▁stroke +- ▁Sy +- ▁või +- 狂 +- 迟 +- ▁Tot +- arse +- ▁plum +- 不应该 +- ▁handed +- ▁earnest +- 勤 +- 对此 +- 我来 +- 敌人 +- 佳 +- ▁acquired +- lak +- жал +- ▁fever +- ▁impulse +- 任何人 +- 南部 +- ▁dove +- ▁ihres +- 저 +- nega +- 前面 +- готов +- ▁newspaper +- ックス +- rait +- мар +- 碎 +- 老公 +- 所做的 +- ▁otro +- ▁difficulties +- 開催 +- ▁owner +- 批 +- bü +- aver +- дать +- ▁trace +- ▁gleichen +- தை +- weisen +- aɣ +- ▁coin +- شه +- 드 +- 例えば +- agon +- ▁print +- ▁treffen +- اً +- 乗り +- 法国 +- ذا +- ▁hombre +- 隐 +- 不足 +- ▁Spa +- kı +- ▁departure +- 的方法 +- 老婆 +- ▁хотел +- ▁creating +- nten +- 台風 +- 人も +- ▁وجود +- ▁Sk +- ates +- ▁tough +- ▁ўсё +- ▁unlike +- flu +- ▁zweite +- 冠 +- oba +- 以上の +- 一件 +- sell +- ova +- ▁accomplished +- ально +- てください +- 峰 +- pati +- 住在 +- ▁ignorant +- 事实上 +- ▁deeper +- ▁travers +- ▁venture +- 予定 +- らず +- 病人 +- يك +- হা +- ▁Wirtschaft +- 是由 +- ワン +- cola +- صل +- 妻子 +- ▁我在 +- ▁reputation +- ▁entfernt +- trieb +- ▁fiscal +- ▁combined +- การ +- ▁plainly +- ▁closing +- ▁og +- غر +- ▁eher +- 押し +- ▁فقط +- тры +- 多的 +- 要去 +- イメージ +- 欧洲 +- och +- ṣ +- ▁reckon +- ▁granted +- 際 +- ể +- ▁italy +- 熟 +- ļ +- 淡 +- ▁whisper +- лай +- ▁painting +- ▁poi +- ▁smoking +- ▁понимаю +- ёр +- krank +- ▁insist +- うまく +- ▁fairy +- 但他 +- アップ +- мест +- цен +- quis +- まさに +- tör +- ▁mono +- 試 +- 続 +- ▁orange +- キャ +- 你不能 +- お金 +- ▁Sohn +- гд +- 哪儿 +- 网站 +- 你怎么 +- amento +- icia +- 深刻 +- тр +- ▁mist +- تو +- ▁weder +- 以降 +- 乳 +- ▁attractive +- 漂亮 +- 广 +- ▁sue +- ▁咱们 +- にとって +- 追い +- ден +- ▁Cla +- ▁такие +- 始まり +- ках +- حق +- ▁Ali +- 内心 +- ▁jemals +- ▁suspect +- 次の +- ▁wollten +- ▁hunter +- ▁slope +- lee +- agenda +- ▁spre +- ká +- 供 +- ▁pond +- 你了 +- になり +- 还能 +- ▁risks +- ものが +- ▁Av +- юцца +- ▁Because +- ▁мой +- usi +- 叔 +- ▁seeking +- ▁momento +- ▁margins +- 近く +- ▁General +- ▁epi +- hö +- ▁compelled +- так +- тон +- mä +- ▁وأ +- 艺 +- ▁pound +- ์ +- ▁admiration +- ▁songs +- 行動 +- ▁dein +- 館 +- ▁ښه +- ▁abuse +- ▁Stück +- ▁pope +- ois +- ▁Umu +- كي +- ▁igihe +- になっています +- ấ +- ッド +- manda +- hush +- ▁interessant +- ▁hörte +- ▁bya +- ▁jean +- 的手 +- ▁infinite +- ▁entonces +- ают +- 感兴趣 +- ける +- 一体 +- foot +- дол +- ому +- эв +- chas +- ând +- rück +- zia +- ▁이런 +- 难道 +- ▁adapt +- ▁Kor +- 心配 +- ▁regions +- ▁eagerly +- ▁Mensch +- 一项 +- sab +- 殿 +- ▁Kol +- ▁communities +- ▁kaum +- odor +- わり +- ▁Kla +- 而言 +- ń +- ▁yard +- dine +- ино +- ▁lunch +- 武器 +- ▁我现在 +- ▁forehead +- ▁Bon +- ▁fame +- Ж +- ▁那是 +- ▁withdraw +- ▁mask +- ▁تع +- ▁dag +- liness +- 佩 +- 三年 +- ▁всем +- ▁aho +- レン +- 哪里 +- полит +- 냐 +- ▁idle +- ▁dealing +- bern +- 的信息 +- ▁روی +- ご紹介 +- ками +- geze +- eti +- has +- ▁Mary +- ▁planning +- сь +- 每次 +- ュ +- 控 +- 阶段 +- ▁Kunst +- iff +- لى +- liga +- ▁جو +- ▁remove +- 老人 +- ▁mister +- 準備 +- ▁islands +- ▁erreichen +- 多い +- ▁characters +- ▁Bis +- ▁вся +- 一致 +- 祖 +- stieg +- ▁mom +- ▁само +- geri +- boy +- ▁Rwanda +- ▁estat +- 印度 +- ▁client +- wacht +- 收入 +- ▁Mes +- bles +- jä +- ▁acting +- ▁infrastructure +- ▁predict +- ổ +- щи +- ロン +- cido +- grav +- ▁мог +- ▁burned +- ▁gebracht +- ▁shell +- ▁australia +- cult +- 金融 +- ▁steep +- ▁rang +- 出生 +- 假设 +- ▁مر +- ▁anirà +- сен +- ▁まあ +- 这是我 +- ▁되게 +- voj +- —— +- ▁organic +- छ +- 的影响 +- ▁gather +- 对话 +- 很重要 +- ât +- ffen +- dauer +- ват +- tons +- 跟着 +- 并不是 +- 今晚 +- ▁Sinn +- ▁nghĩ +- ▁Bericht +- ▁É +- cono +- lose +- 辞 +- こちらです +- ▁일 +- 爱情 +- だな +- 说明 +- ях +- ▁tag +- ▁اون +- ▁electric +- 昔 +- ▁commander +- 凡 +- கு +- ▁chin +- det +- 続け +- ▁fy +- 比如说 +- ▁thrill +- 一千 +- 证据 +- だったら +- ▁яны +- ▁causa +- ▁insult +- ▁fou +- час +- 实际上是 +- 莎 +- ▁Rück +- ▁vessels +- ▁junge +- ▁eens +- ▁inclined +- ▁opposition +- 計 +- ▁Forschung +- spec +- ▁unfortunately +- ▁towns +- ▁الر +- 提到 +- ▁shirt +- ▁сделать +- 执行 +- 野菜 +- 快速 +- hole +- ▁இரு +- isiert +- ▁ella +- ▁Richtung +- ▁wedding +- ▁Viele +- ▁leadership +- 義 +- 你觉得 +- 销 +- 逃げ +- ▁ali +- ▁institutions +- nā +- aven +- ▁billy +- ▁schrecklich +- licher +- ▁icyo +- ब +- ▁descend +- 答应 +- 判 +- eck +- ▁gihe +- 注意到 +- 情感 +- dha +- '35' +- ▁undertake +- ▁extrem +- 拼 +- ▁Unterschied +- won +- ▁equipment +- لم +- 多く +- år +- lässt +- ▁philosopher +- ▁uttered +- 毕业 +- 主任 +- 星期 +- 施 +- ▁sed +- маг +- 再见 +- ▁Yu +- 我跟 +- 长的 +- やった +- いか +- 发现了 +- くん +- ▁shared +- 租 +- ların +- ▁doubtless +- aires +- كل +- යි +- 가지고 +- ソン +- 的吗 +- ▁Einige +- ▁vrouw +- 不够 +- ▁intense +- fol +- ▁glow +- ▁permitted +- ▁pis +- quelle +- ёт +- 成长 +- طور +- lib +- 跟我说 +- 最後の +- 価格 +- 逼 +- ▁settle +- 结构 +- stände +- κ +- fallen +- ▁negro +- ▁♫ +- chin +- ▁лучше +- য়ে +- schein +- нул +- ▁While +- ike +- ▁tijd +- ▁học +- ्या +- ▁emotional +- 模 +- city +- zed +- ▁passat +- ▁avons +- 래 +- tah +- ▁человека +- 是一 +- говор +- ҡы +- ▁desde +- ▁deny +- ▁obligation +- ভ +- رق +- 異 +- stad +- ▁providing +- 专家 +- ▁Ag +- zak +- ▁Sand +- ▁lighted +- ▁baba +- hall +- ografi +- ▁nella +- stab +- ▁substantial +- иль +- கோ +- ▁vague +- 一方 +- ▁cease +- ▁Erfahrung +- 严重 +- λ +- 的眼睛 +- ▁ありがとうございます +- 去找 +- ▁historical +- олог +- 踏 +- ▁Name +- 意识 +- 何で +- 将来 +- 域 +- یس +- どういう +- したり +- gura +- ▁curve +- ▁Aufgabe +- ▁fence +- ▁dancing +- ▁flung +- ▁investigation +- folge +- ▁evolution +- ▁და +- ▁Мо +- گە +- ▁conserva +- нии +- ▁load +- ▁nhiều +- ▁parole +- 尊重 +- ',"' +- psy +- 効果 +- ▁egypt +- geht +- たちが +- ulu +- ús +- 表演 +- 候 +- そういった +- ▁speaker +- ▁East +- ▁utiliz +- 记忆 +- ▁wooden +- ▁beloved +- 一度 +- ▁allows +- ▁pursue +- keeper +- 環境 +- leh +- 胜 +- ▁phase +- 긴 +- 更好 +- 没想到 +- ím +- tě +- ▁lecture +- ▁응 +- 订 +- 犬 +- ▁vorbei +- ▁Mü +- ▁lately +- nosti +- ▁bod +- 一定会 +- programm +- ▁arme +- ▁irish +- ữ +- 恐惧 +- 생 +- idade +- 按照 +- 佛 +- ▁Gesetz +- ▁rabbit +- 钟 +- ▁wilde +- 国民 +- west +- ה +- 我々 +- 慢慢 +- 刑 +- 頼 +- tun +- 搜索 +- note +- ▁grandfather +- larını +- ▁decisions +- ▁murmur +- ▁terra +- ▁할 +- ▁explore +- ▁Ven +- していて +- கள +- ▁TED +- tivo +- 文字 +- メンバー +- bà +- 驚 +- ▁staring +- 電話 +- 满足 +- 支付 +- pac +- ▁cliff +- erson +- のため +- 穿过 +- 插 +- пал +- ▁trunk +- ▁league +- ▁acest +- ▁Tan +- orient +- cele +- ▁desperate +- ▁彼は +- tical +- ना +- ▁expensive +- ڕ +- 适 +- ▁stuck +- ▁Stunden +- ▁dared +- ▁Jahrhundert +- gres +- ஹ +- ▁cheerful +- かどうか +- oor +- 議員 +- 更新 +- 益 +- һы +- ▁Rest +- 分かって +- 创 +- ▁après +- ▁إن +- иде +- Q +- ông +- ▁obey +- しても +- iad +- tje +- nner +- zimmer +- ますよね +- ▁sistem +- ▁workers +- ▁거기 +- 员工 +- ▁spain +- ▁nad +- ▁rend +- ▁observation +- ▁llama +- ▁volumes +- 地下 +- 北朝鮮 +- ந +- gir +- ▁declare +- ▁upward +- 極 +- طر +- ▁deserve +- ▁klein +- ▁defence +- ▁increases +- ▁simp +- ▁tramp +- 台湾 +- 对他 +- bord +- ▁camera +- ancy +- ▁jacob +- ▁kwe +- ată +- 问题是 +- 実際に +- ▁Sche +- welt +- ▁Arten +- gla +- ▁Bürger +- የ +- ộ +- ▁Worte +- ever +- 梦想 +- bec +- ▁begged +- ▁wretched +- ▁hơn +- ▁хотя +- 些 +- ስ +- ант +- ▁sending +- 降り +- ▁gilt +- lando +- że +- ▁India +- ▁questa +- grün +- 手を +- qi +- ▁Mel +- ▁assure +- 这意味着 +- あって +- смотр +- iber +- ナー +- шка +- ця +- ▁entering +- ▁prze +- tak +- ▁hervor +- ages +- ▁offset +- اج +- ▁今日 +- ▁cũng +- ▁consumer +- zzi +- 신 +- ▁adding +- ▁virginia +- ▁capture +- ▁pistol +- ▁curl +- ▁accounts +- 律 +- ▁Zahl +- rut +- なん +- 紧张 +- ▁pursuit +- ▁Michael +- ası +- ▁Far +- ▁Berg +- itza +- stück +- mira +- ▁besonders +- ▁enhance +- fulness +- dhi +- யாக +- ▁چې +- ube +- 肩 +- ▁jews +- ▁أنه +- ▁ils +- ▁swear +- ▁erreicht +- ▁sentiment +- ▁reaching +- илась +- かり +- ▁cents +- 중 +- 让她 +- ▁volta +- ▁yell +- 今天的 +- ские +- ▁expansion +- ▁notre +- ▁torture +- ▁purposes +- 她们 +- ▁disgust +- ެ +- ▁aktiv +- ▁highlight +- yol +- ้ +- ▁pit +- 所以我们 +- 力を +- ▁magazine +- ▁America +- rance +- 它会 +- 崎 +- ▁saddle +- ▁pont +- 来た +- blau +- ક +- tama +- ვი +- ▁Richtig +- ▁depart +- dag +- ▁只要 +- ▁Trans +- lagen +- ▁кан +- になっている +- 思って +- 不起 +- ▁indulge +- polis +- 鳥 +- ▁четырнадцать +- ▁тэр +- ভা +- gabe +- ▁langsam +- በ +- ▁должен +- ▁leverage +- ▁bench +- 摆 +- ▁distinction +- / +- 大雨 +- 地上 +- ने +- куп +- ▁biết +- действ +- tore +- east +- ▁brook +- brach +- issa +- 해서 +- ▁lesen +- ▁Erde +- ▁bari +- ▁quelque +- say +- sinn +- ▁vend +- 階 +- rand +- نە +- ▁damals +- рак +- ▁recovered +- 過去 +- の大 +- ▁verd +- ники +- 运行 +- ▁begins +- дум +- ▁mbere +- 圧 +- ▁alice +- 范 +- ▁cutting +- ▁rescue +- бар +- 奶奶 +- ▁incredible +- 理论 +- ▁grain +- ▁habits +- なし +- ▁merry +- ▁جا +- ったり +- уме +- 现代 +- نس +- ▁whereas +- ▁cloth +- ▁havas +- ää +- 时期 +- 启 +- ▁gesture +- ▁belonged +- ▁Ко +- تە +- ▁случае +- jet +- 拿着 +- ▁journal +- ▁rien +- 暮らし +- 活躍 +- èrent +- 得多 +- 合わせて +- vuga +- ▁lodge +- 是他 +- ▁gingen +- தே +- ロシア軍 +- してきた +- يس +- ação +- ата +- 一方で +- ▁tat +- 観 +- 一名 +- ▁بد +- ▁irre +- ▁خیلی +- 過ぎ +- ▁sowohl +- 歌曲 +- nett +- ▁früh +- ▁Та +- ▁гэж +- 宋 +- 荒 +- 掌 +- kad +- ▁Reise +- ▁dread +- ▁fog +- 没错 +- bled +- うわ +- 积极 +- ▁Verbindung +- ▁sensible +- ▁mounted +- ▁interes +- mur +- 设备 +- 音楽 +- '2000' +- ▁никто +- rug +- ಾ +- We +- 承 +- ▁shadows +- ▁vin +- nung +- 被称为 +- ▁Weißt +- ▁arrow +- jal +- زه +- ▁misery +- 狼 +- lets +- 的机会 +- wig +- glich +- 遗 +- fehl +- ▁komm +- ▁なるほど +- 信任 +- ▁brachte +- iel +- ▁editor +- ▁Alter +- 他对 +- ости +- ▁gegeben +- над +- جي +- نم +- gard +- lja +- 傻 +- лав +- ť +- 、3 +- ▁padre +- 厳しい +- 我不能 +- んじゃない +- ▁observ +- उ +- ▁prepare +- 珠 +- tanga +- でございます +- ▁bowed +- ▁Super +- '27' +- iers +- ись +- ▁Sen +- 不如 +- ▁jumped +- ▁Gal +- ▁permet +- 仕 +- 很少 +- ރ +- 投げ +- 了这个 +- ▁troubled +- ▁sufficiently +- 我不想 +- hun +- ▁cars +- rzy +- ▁그래 +- 欢 +- gelijk +- erlei +- ▁whence +- рук +- și +- ▁Martin +- stelle +- ▁Van +- ▁باید +- 塩 +- ▁schl +- 采取 +- ング +- 让人 +- 为什么要 +- ▁lamb +- 진 +- ▁fired +- ▁Wissenschaft +- 沿 +- 多了 +- daki +- れない +- ▁señor +- dura +- zahl +- ▁zeer +- ▁external +- ▁كانت +- mour +- 失去 +- ▁Vereinigte +- ▁муж +- ર +- cchi +- فعل +- ▁schreiben +- ▁Deshalb +- ▁falls +- ▁disc +- ▁drum +- ▁нибудь +- ▁рэ +- 革命 +- ▁faut +- 離 +- みると +- ▁dalam +- ▁Eis +- ▁helpless +- bbi +- 闻 +- ▁голос +- 的脸 +- ▁schöne +- 电脑 +- ▁scorn +- 没什么 +- 正解 +- spann +- ▁Danke +- 飞机 +- ▁runs +- ▁includes +- assa +- ▁gedacht +- ▁fins +- verse +- aged +- enzi +- bé +- mento +- ▁Arbeits +- fang +- ▁straw +- ▁Schul +- 訪 +- ▁experiences +- ▁beam +- ▁rendered +- データ +- ▁lesson +- おり +- ▁verse +- ▁mirror +- زا +- ▁cheek +- カメラ +- ▁Gegen +- ял +- 部队 +- 穿着 +- ▁leads +- ત +- ▁technical +- ▁conceal +- ▁zeal +- ▁authorities +- ▁losing +- arri +- 我们是 +- ▁wagon +- ▁Bett +- ▁reports +- ▁weapons +- spel +- ▁kick +- ▁그게 +- ▁assumed +- ▁qualities +- あげ +- 魅力 +- ▁dwell +- マイ +- ▁peak +- 見せ +- ▁jug +- ▁endlich +- ▁tio +- 我没 +- ▁stretch +- À +- falls +- ្ +- пі +- nih +- ▁aufge +- 公開 +- аз +- lju +- گاه +- ▁eve +- 谈谈 +- მა +- ▁namens +- ▁করে +- ジェ +- وو +- aging +- tered +- вай +- ▁zit +- とこ +- ස් +- iah +- ▁discipline +- ര +- 미 +- Re +- bara +- ▁exposed +- شت +- ▁infant +- ▁Ä +- ▁email +- ▁owe +- 期间 +- ▁icy +- 一句话 +- 徐 +- ょ +- ▁yara +- ▁Ana +- ▁carbon +- 行了 +- eja +- あす +- ▁pile +- ▁planned +- kita +- ▁railway +- ▁знаете +- 一句 +- 製 +- üb +- 今夜 +- ▁ahora +- ▁naj +- ?」 +- dou +- şte +- 正是 +- ▁ultimately +- 沖縄 +- bahn +- vine +- 减少 +- ▁پس +- ▁Washington +- ▁considering +- ▁Era +- ▁persona +- nimmt +- 有时候 +- ▁bara +- 用的 +- ▁tomorrow +- ifa +- 他们是 +- 看了 +- を中心に +- front +- ayı +- ▁primary +- 的心 +- 过程 +- ▁spur +- ▁distribu +- 要么 +- уш +- ▁nk +- 戦争 +- fir +- ▁измен +- 案件 +- ▁neighborhood +- 意識 +- ▁drunk +- ▁よろしくお願いします +- 黑人 +- ▁tran +- ▁мир +- ▁extend +- ▁USA +- 神经 +- 时候 +- ▁그렇게 +- که +- ▁raw +- nus +- 学会 +- mpel +- impo +- 军队 +- ▁Gericht +- arra +- ▁winds +- ▁jungen +- ప +- ▁Lassen +- ▁деньги +- ▁sabe +- ▁mobile +- пло +- ති +- ▁cub +- illo +- ecek +- ▁こちらは +- ▁Gri +- ി +- ▁rằng +- ▁Mil +- piece +- ▁mucho +- 处于 +- ▁pier +- ▁aloud +- ▁Gold +- ద +- ▁Hilfe +- قي +- ▁americans +- geno +- ▁gates +- vio +- 早期 +- ▁separated +- ▁நீ +- ન +- 出す +- 最新 +- ▁mam +- illes +- ▁ведь +- ▁avant +- ▁заб +- уй +- ▁এই +- 更好的 +- key +- ▁kannst +- ▁quantity +- ▁свое +- ▁toujours +- ▁ages +- ു +- ませんでした +- ▁opinions +- ▁images +- Se +- 窗 +- setzung +- 开放 +- 就能 +- みて +- tritt +- 辺 +- range +- 運動 +- ▁wander +- вет +- 費 +- ▁haste +- 正式 +- ▁sexual +- ṭ +- ▁Gedanken +- ▁reli +- それに +- ▁100 +- ▁weird +- ದ +- 细胞 +- ▁همه +- ▁portrait +- ▁chill +- 生活中 +- чер +- 纽约 +- estra +- ▁sera +- 全く +- anz +- ▁слова +- ▁County +- もん +- ▁det +- وج +- ▁deed +- ▁claims +- ▁delightful +- ▁vroeg +- ▁яе +- ข +- 別の +- ▁тех +- 면은 +- ark +- ▁scattered +- ▁gazed +- rado +- гром +- वा +- Any +- обра +- 仙 +- ality +- ▁missed +- ▁prin +- 宣布 +- ▁flatter +- 都没 +- ▁essere +- ▁personally +- 子里 +- 後ろ +- 离婚 +- ▁magnificent +- ▁miracle +- ▁rolling +- ▁thread +- 述 +- ▁fuel +- ▁altar +- ▁قبل +- دى +- お母さん +- 行く +- ▁daughters +- өг +- ▁crash +- ະ +- ू +- ▁challenges +- его +- 哪个 +- 介 +- ▁rồi +- tron +- akt +- ▁ص +- undzwanzig +- ▁gens +- maid +- できます +- plaats +- ▁bosom +- ロシアの +- ▁mm +- ▁methods +- staan +- 兄 +- ection +- 握 +- ▁그때 +- ான +- كى +- せる +- بع +- colored +- ▁ĉe +- нда +- mbre +- ▁robe +- マスク +- ▁Mars +- ▁concert +- mov +- you +- ▁midnight +- ▁missing +- 分钟 +- ▁drag +- ার +- ▁joint +- nost +- つき +- ▁Ён +- ▁于是 +- sini +- ▁Jag +- ương +- 历 +- ables +- ▁restrain +- ▁africa +- ▁horizon +- 以至于 +- خت +- ▁reduction +- ▁совершенно +- ▁insisted +- 邀请 +- 欲 +- 坦 +- bru +- ▁Anfang +- ▁zeggen +- 우 +- ▁Feuer +- freund +- bwira +- 减 +- ண் +- ▁Afrika +- ▁außer +- 这么做 +- 宅 +- hearted +- legt +- 長い +- にお +- omen +- ▁trou +- ▁Brief +- ▁anna +- ▁vest +- ▁你就 +- ских +- ▁bestimmte +- ▁sia +- 练 +- ▁raising +- 竹 +- 规则 +- ▁ignorance +- cep +- dek +- ▁گفت +- ▁einzu +- 敗 +- ▁führte +- ▁хорош +- حل +- ▁이렇게 +- 成本 +- 撤 +- ▁consequences +- ▁factor +- 旁边 +- ▁patron +- ▁involve +- pas +- 风险 +- erie +- cap +- 宿 +- 够 +- ▁Ф +- 一生 +- られました +- ▁sink +- 建造 +- ▁valor +- ▁против +- ▁cock +- ▁نیست +- 年龄 +- ティー +- 严 +- izza +- ▁момент +- mäßig +- ▁这是一个 +- ▁province +- ▁convert +- tina +- cup +- tiques +- ▁breed +- 干吗 +- rb +- gefallen +- ▁paint +- ▁foe +- ulation +- 沃 +- ▁kugira +- igh +- っていうこと +- ▁அவ +- plic +- koze +- 亡 +- ▁prote +- pok +- ▁primera +- ▁Sun +- ▁Volk +- ▁entra +- ▁другой +- ▁aufzu +- емся +- ▁cards +- のよ +- куль +- 改善 +- ▁perché +- 执 +- ▁Dingen +- ▁mainly +- wohn +- ドル +- ▁cet +- ヒット +- ವ +- ▁folk +- onde +- このように +- 劲 +- 明らかに +- ▁trên +- থ +- 旗 +- 봐 +- ަ +- ▁மா +- ▁significantly +- ▁chase +- ▁مثل +- 振り +- 当初 +- 儿童 +- ▁nam +- 会議 +- 剑 +- ӓ +- ▁Richard +- 날 +- ▁absent +- რ +- луч +- 载 +- ▁هناك +- ▁appar +- せて +- ▁Band +- ▁我不 +- 後に +- oedd +- De +- ▁минут +- 没有任何 +- 距離 +- 竞争 +- ▁Ö +- ▁Fahr +- gaben +- ▁15 +- sert +- 危 +- ▁кого +- 本人 +- ά +- ▁Air +- ▁Kultur +- yal +- ▁lust +- 漫 +- ▁developing +- '26' +- ▁boots +- あとは +- stream +- ▁ບໍ່ +- 弾 +- dress +- ▁quelques +- ▁Bewegung +- ▁messenger +- жен +- ▁roads +- ų +- ▁menschen +- gender +- deck +- ▁molto +- mart +- 学家 +- ▁basic +- 也不会 +- ▁nag +- チャ +- を受けて +- ի +- 有的 +- ▁firmly +- 提高 +- ▁одно +- 団体 +- ender +- ▁Bezug +- kura +- wort +- ▁inclu +- لار +- 作家 +- 話題 +- 住民 +- bura +- çı +- ▁crowded +- diği +- ▁outcome +- 有效 +- volle +- ▁May +- َّ +- ▁honey +- 行って +- чет +- boot +- 分かる +- یی +- 家伙 +- boat +- wara +- stimmen +- 公平 +- 発見 +- ▁Вы +- 的情况下 +- ర +- ▁sana +- ▁ram +- ▁тысяча +- ▁especial +- ▁hastily +- されます +- ▁Vielen +- 和其他 +- 或者是 +- ▁hai +- ▁nearest +- ▁multitude +- kraft +- үүд +- minister +- ivi +- ▁rất +- ▁plays +- ▁clay +- 销售 +- ▁porte +- 周围 +- wechsel +- ууд +- ▁erzählen +- ▁ernst +- ▁оно +- ▁系 +- 市民 +- ▁leaning +- ▁overcome +- 长期 +- melt +- ▁Groß +- үр +- ▁innerhalb +- ▁scrap +- cita +- ▁returns +- ▁april +- 奈 +- gehalten +- 应用程序 +- dela +- ▁kri +- amp +- 滚 +- ▁grasp +- ▁cách +- 软 +- ▁corps +- elde +- ▁lincoln +- ▁lee +- 全て +- ▁continually +- ▁Rus +- ▁خوب +- 一步 +- 人と +- 一九 +- ▁đầu +- ▁lokal +- ▁молод +- に対し +- ege +- ▁disorder +- ▁implement +- 吐 +- 默 +- nick +- 足够 +- 者が +- ▁twin +- ▁между +- 餐 +- 下的 +- タイ +- iment +- 自分が +- ▁reveal +- ờ +- schlagen +- uki +- richten +- тельно +- 相关 +- ▁kadar +- ▁cosas +- ▁chinese +- ▁Paris +- ▁rebel +- 挑戦 +- ▁এক +- ▁humor +- ▁improving +- һа +- いただいて +- ваць +- ▁fühlen +- ▁cerca +- āk +- ▁ornament +- ▁Wahrheit +- 带到 +- 科技 +- 找到了 +- 真的很 +- euse +- ▁purple +- 聞き +- 外面 +- ▁deshalb +- ▁amid +- ▁Mac +- 疯狂 +- ления +- 颜色 +- ▁clau +- ுக்கு +- ▁continua +- 职业 +- ネット +- 張 +- کرد +- 浪 +- 的行为 +- kleid +- ல்ல +- ▁september +- ▁Schiff +- tava +- 软件 +- 遠 +- 也不是 +- ▁Este +- 思う +- ▁confirm +- tete +- になると +- 乌 +- istan +- どれ +- äre +- 皇上 +- ▁oath +- 愤怒 +- دل +- 朗 +- ▁apparent +- ồ +- ▁khác +- ▁shaking +- ▁acquainted +- ▁citizen +- stav +- ▁biz +- 陸 +- 得很 +- 权力 +- ▁lieben +- ▁fost +- ▁badly +- ▁King +- ▁mankind +- util +- ▁هیچ +- ҳ +- tele +- sucht +- 戻 +- Da +- ▁centuries +- ようです +- нар +- ▁urged +- чен +- matic +- ▁throwing +- ▁outlook +- ▁loves +- ▁religi +- を感じ +- OK +- gab +- ▁Nähe +- ▁issued +- zioni +- ▁đây +- iriza +- gesch +- 裁 +- aud +- ▁Universität +- さっき +- lika +- ▁studio +- рат +- 聴 +- чин +- 分からない +- ▁зада +- ▁afterward +- lerini +- ▁conta +- ましたが +- 逊 +- 트 +- ▁있는 +- ▁Tr +- ▁illness +- 見え +- ▁alter +- ▁sounded +- posta +- ▁Job +- ▁significa +- 下降 +- ▁третий +- nyo +- elen +- ▁کنیم +- avuga +- ▁poem +- ▁Sur +- 变成了 +- ख +- 再び +- sili +- 早就 +- نج +- greg +- 絶対 +- ▁First +- trac +- ▁photo +- 当年 +- greifen +- iĝas +- ▁wären +- ▁caso +- ode +- '45' +- 能不能 +- Me +- らしい +- ▁enthusiasm +- マー +- ▁bene +- 警戒 +- انه +- ừ +- krieg +- ř +- 爬 +- ▁شهر +- ▁preserve +- ▁Kur +- 震 +- ▁spear +- 浅 +- ▁bwo +- ▁moeten +- imba +- ▁comparison +- したこと +- ▁câ +- 食べて +- 胖 +- 拡大 +- ▁право +- ▁kw +- ▁الخ +- 蓝 +- 臣 +- 什 +- ▁recorded +- ▁rational +- ▁estimate +- 你知道吗 +- ▁thinks +- 見た +- ▁arrest +- ssy +- ▁trend +- 表面 +- eke +- ▁durant +- ▁contain +- ▁Dy +- ற்க +- ைய +- 一件事 +- 临 +- кә +- sicher +- 人都 +- ▁gathering +- ▁transaction +- coll +- ▁explo +- ▁maria +- ovan +- ▁当我 +- ног +- 什么事 +- ला +- 同事 +- ▁toch +- 議 +- ▁vorher +- 说是 +- ▁Wat +- ▁genuine +- リーグ +- ckte +- jahr +- ▁madam +- ध +- مون +- タイム +- ▁sig +- scribe +- 购买 +- premi +- Pa +- ▁হ্যাঁ +- ога +- 识 +- ▁Fern +- 日子 +- 斜 +- 분 +- ника +- ▁hook +- ania +- ▁schwa +- ▁Zeitpunkt +- ▁год +- ▁meadow +- ▁charity +- ▁نام +- ▁column +- ministr +- ▁Kraft +- ঁ +- 的钱 +- ▁revealed +- 宣 +- ▁fearful +- ▁melancholy +- ▁daher +- 机器 +- ▁первая +- まずは +- vesti +- 把自己 +- ▁ansehen +- ▁lugar +- ▁Geschäft +- 交给 +- ▁leaf +- зо +- 腹 +- 虎 +- 'No' +- ქ +- ▁compliment +- 但这 +- ▁trot +- ▁lands +- ▁Fuß +- 報告 +- ொ +- 巡 +- iš +- ▁manners +- جد +- 治療 +- 等待 +- خط +- ▁brow +- ▁mock +- овая +- tico +- 生き +- ▁wet +- 怎样 +- passen +- lur +- ▁Yuba +- シー +- 不在 +- embro +- ধ +- ▁zonder +- ន +- Ü +- ▁craft +- nji +- 档 +- فی +- য +- ▁misfortune +- kaba +- вес +- ക +- flow +- ▁Pero +- さんです +- アウト +- чым +- 过程中 +- ppel +- これで +- cce +- вае +- ▁alike +- 阻止 +- しましょう +- ▁blu +- 嘉 +- ▁senior +- ▁stellte +- version +- 药物 +- ▁然后我 +- 忘 +- вала +- ▁gloom +- 复杂 +- 的节目 +- ▁mejor +- ča +- wende +- 演员 +- ▁borne +- センター +- ▁спа +- ▁ebi +- ▁requires +- 杜 +- ▁پا +- ▁fac +- 医療 +- pher +- 第二个 +- ▁switch +- yne +- ▁theatre +- 作った +- 信号 +- 体验 +- лам +- 你需要 +- ▁sixth +- ▁abandoned +- ▁tipo +- 類 +- ▁noted +- 规 +- ▁initial +- 上去 +- شار +- ▁charged +- 宇 +- 돼 +- ▁ciudad +- 当たり +- 为你 +- ▁cares +- އ +- 活着 +- ▁encourage +- 門 +- tures +- 간 +- ▁Би +- ▁referred +- erde +- ▁leaned +- дом +- ▁Blut +- ▁шо +- 非常感谢 +- انت +- 画面 +- ▁marks +- ▁flock +- ะ +- 以下 +- ▁công +- ▁saß +- ▁originally +- ▁sowie +- kti +- ▁encouraged +- cm +- psi +- ▁жизнь +- ▁hunger +- ता +- 我今天 +- 忠 +- stel +- уп +- 自信 +- قى +- ▁volen +- acco +- ▁acu +- ▁яшчэ +- ▁而是 +- lation +- aks +- nish +- angan +- 芸 +- гад +- 할 +- ям +- ี +- ogni +- ▁hoping +- 約 +- 紙 +- 出版 +- さんも +- ▁Hat +- ▁likewise +- 치 +- ▁contempt +- ▁Seiten +- ▁свою +- 我我 +- ▁wanting +- ไ +- య +- anze +- こそ +- ▁operational +- вести +- سر +- ▁gaan +- کس +- maß +- 民族 +- 仅仅 +- 阿姨 +- فل +- ▁тысяч +- rò +- DNA +- 工程 +- ▁stout +- 毎日 +- ▁senses +- 研 +- dil +- ▁fuhr +- aca +- 担当 +- ▁говорить +- ▁dispute +- rgi +- ▁landscape +- tima +- ▁الذي +- сте +- ▁brick +- بول +- jer +- َا +- ▁examples +- 对我们 +- ▁Hin +- ▁großartig +- ▁gir +- ▁engineering +- ▁cau +- ▁monk +- mona +- ځ +- 上班 +- 作者 +- ▁má +- ▁الط +- зов +- 伝 +- ▁chỉ +- dern +- ▁provisions +- '28' +- ales +- ▁Bal +- 创建 +- atory +- ▁introduce +- ▁назад +- bart +- 好きな +- mother +- üz +- ▁engage +- гон +- лад +- ▁champion +- 子的 +- 食品 +- 見える +- SNS +- ▁inferior +- ▁gratitude +- 九州 +- ▁trained +- ▁mensen +- 可怕的 +- 闲 +- 走吧 +- stern +- view +- 玛 +- 开了 +- 単 +- ▁finance +- ▁Gebäude +- führung +- 是这样 +- の影響で +- すでに +- 的关系 +- ▁personas +- ▁bride +- ▁occasions +- 自分で +- ▁priests +- と同じ +- 搬 +- imiento +- сем +- ▁То +- 呗 +- iche +- used +- стаў +- ▁chicken +- ▁Straße +- 好处 +- 是非常 +- hom +- 公司的 +- away +- ▁temperature +- ▁sob +- ▁darling +- ▁knock +- tick +- 术 +- 甜 +- franc +- ▁гол +- ▁Quan +- based +- 不得 +- しっかりと +- ▁artificial +- sburg +- メディア +- 大きい +- නි +- 增 +- ▁आ +- られている +- өөр +- tät +- 很大 +- ▁todas +- 工作的 +- aş +- 听说 +- 所说的 +- vali +- schied +- ▁objective +- سو +- 区域 +- じゃなくて +- avait +- likuwa +- 鹿 +- ▁laut +- ではなく +- ▁monta +- ▁swell +- 主席 +- ▁anderer +- ▁ellos +- 是吗 +- ▁determine +- 的第一 +- ▁Familien +- ▁threatened +- のも +- 灰 +- 我的意思是 +- ▁شو +- ▁utmost +- miz +- ▁nacht +- ▁arts +- 束 +- ▁commit +- ▁backward +- ▁efficiency +- ▁surrender +- ebe +- とともに +- نده +- 通知 +- ەر +- 这些人 +- baha +- ▁sonst +- git +- vale +- ▁зам +- iness +- ▁Preis +- お伝え +- ▁imagined +- 内で +- ▁visual +- 의 +- ▁Bedeutung +- んだろう +- まった +- чат +- wang +- ▁probable +- 他也 +- ▁conclude +- ▁Ili +- 財 +- 設 +- वि +- ots +- dium +- eerd +- ▁employment +- 的女人 +- ຮ +- 梁 +- 変わって +- 民主 +- quent +- 十六 +- uge +- ▁retain +- ச்ச +- ▁monarch +- ▁receiving +- かん +- utse +- ▁dreißig +- ▁Anne +- ▁employ +- оз +- ▁attempted +- ▁july +- ▁nào +- tief +- すぐに +- ēja +- ▁draußen +- ▁könig +- 我们也 +- رج +- ▁detective +- kup +- ▁geç +- そのまま +- ぶり +- வர +- ▁childhood +- кал +- ▁ciutat +- ении +- ▁bloom +- かも +- ▁wilt +- koj +- ▁poured +- んの +- ▁hinzu +- 節 +- ager +- 附近 +- ▁femme +- 才是 +- cco +- ▁ramp +- によると +- buka +- ▁уг +- ▁eki +- ▁monument +- ▁contains +- 公众 +- öz +- ▁items +- Mo +- поль +- ▁kleiner +- 从来没有 +- が入って +- ▁instruments +- ▁thine +- ▁essen +- 侧 +- habit +- 桃 +- をお +- ▁suchen +- 敏 +- ▁Region +- shin +- تل +- ▁Tiu +- 尚 +- 葉 +- ators +- наход +- డ +- ▁могут +- zik +- ステ +- 态 +- wagen +- ▁recognition +- mez +- フィ +- ▁safely +- ▁Jack +- ija +- ▁телефон +- овка +- ▁commitment +- 跟你说 +- ▁wage +- зда +- සි +- тын +- 好不好 +- ▁мало +- ▁alleen +- ▁பு +- ▁hasn +- スの +- ってきた +- 悪い +- ▁relatively +- ▁Mur +- ▁Bru +- ▁relationships +- ▁rat +- るように +- ▁Doch +- ▁noon +- 同样的 +- ▁integr +- 简单的 +- ▁seal +- гляд +- ▁Uhr +- 我怎么 +- ▁dining +- ▁gebe +- culture +- 師 +- 他不 +- ▁kot +- ▁huis +- 帮你 +- 大家都 +- ▁solve +- fur +- ▁poll +- ▁jamais +- meri +- ▁cared +- bonne +- 思想 +- 数量 +- 一半 +- ▁furniture +- ▁territory +- ▁verbunden +- gaan +- Imana +- herr +- ▁estar +- ▁revis +- hale +- ▁Kü +- ▁lucky +- 账 +- ▁pursued +- の皆さん +- っていうのが +- ▁mondo +- 瞬間 +- スポーツ +- dě +- ▁ugly +- 秦 +- 資 +- ▁ولكن +- ▁gering +- пас +- 始めた +- ▁cries +- ありがとう +- 裂 +- ▁amor +- alis +- haber +- ë +- 巨大 +- гер +- たの +- kee +- وە +- ▁groot +- ▁प्र +- цев +- 新たに +- 谱 +- ары +- نفس +- ▁ваш +- ▁кры +- maal +- ▁mum +- iter +- ▁mig +- bá +- 盟 +- 向け +- ▁betty +- ▁никогда +- ▁кү +- гэ +- どの +- ブラ +- 北部 +- йн +- ▁peasant +- gren +- 很容易 +- 下さい +- ไม่ +- ▁Ek +- ▁Are +- friend +- ▁invisible +- evi +- ▁climb +- lek +- rika +- tua +- ▁terug +- эк +- ▁mijnheer +- ▁decir +- ▁Mill +- とおり +- ▁studi +- ▁shield +- 伸 +- assi +- ▁award +- 需 +- pira +- 禁止 +- ▁autor +- ▁contre +- ▁entend +- 给他们 +- ▁hugh +- '!"' +- ▁passa +- ▁erwähnt +- 公开 +- ▁zweiten +- iau +- ▁Ok +- mitten +- ৈ +- 喊 +- dle +- 来る +- inu +- ▁ئە +- ▁freely +- 汤 +- なんですよ +- ▁nuestro +- ▁couch +- ▁mistaken +- '400' +- ▁expecting +- 攻击 +- ▁landing +- 你对 +- 的就是 +- ▁größte +- ▁那就 +- 这对 +- 点击 +- ried +- ▁bowl +- 裏 +- და +- kah +- بط +- face +- lekt +- ▁urbo +- ▁solchen +- 距离 +- вели +- ▁drama +- をした +- chip +- اص +- mouth +- ▁statt +- antwort +- kho +- โ +- 她是 +- ▁literary +- ▁بیا +- fassen +- ▁michael +- ▁nights +- ▁jag +- 都可以 +- ▁Geschichten +- дын +- 出来的 +- yê +- 模型 +- ▁آیا +- ▁пятнадцать +- ▁belle +- ピー +- ▁Stra +- бит +- ▁merchant +- ▁russia +- 骂 +- ▁Amb +- 今は +- youtube +- 수 +- ▁grade +- ▁ɣer +- ც +- 撒 +- ▁pipeline +- 聚 +- ▁Möglichkeiten +- たのは +- ▁julia +- 容疑者は +- 微信 +- ▁얘기 +- くなって +- ▁umuntu +- sprung +- などに +- খা +- ▁vorstellen +- ▁decent +- 動か +- もいい +- 的力量 +- トン +- ▁acted +- ▁Personen +- 恐 +- 大型 +- ▁reproach +- エン +- ▁nhà +- ▁Heute +- に対する +- マーク +- ▁Street +- ▁toutes +- ▁hinein +- ▁двенадцать +- даа +- ▁ब +- ▁Ah +- ера +- ▁kama +- ▁hob +- ▁abzu +- 黑暗 +- ▁hated +- ▁steal +- ▁더 +- ▁yıl +- ▁事实上 +- volu +- ▁powder +- Ch +- klu +- 针 +- ং +- ▁Entscheidung +- ▁mail +- 话题 +- ▁otra +- rir +- ▁rhy +- ▁сразу +- 你可能 +- cija +- там +- dow +- 摸 +- rite +- ienne +- 感染者 +- play +- lick +- ▁denk +- ▁están +- 若い +- ▁verändert +- チャンス +- 红色 +- ▁bade +- ây +- mt +- nych +- 世代 +- ▁johnson +- ▁fears +- som +- 什么呢 +- 去做 +- ▁prompt +- 最終 +- ▁capabilities +- ுவ +- ▁Anti +- ▁harder +- 在那 +- ides +- ▁examination +- ▁invitation +- ▁Erfolg +- 小的 +- 定义 +- 测 +- ▁això +- 住了 +- 赫 +- ▁авто +- ▁apt +- ▁Roman +- wahr +- hard +- ▁irà +- ▁toute +- 会場 +- ▁spectacle +- 個人 +- 德国 +- حد +- 别的 +- けて +- ▁näher +- mpe +- 用意 +- ▁کی +- ▁bedroom +- ▁puc +- ▁nascut +- 一日 +- ▁30 +- ▁meantime +- ということは +- ▁Charles +- ▁shareholders +- 明白了 +- 국 +- 故 +- ыз +- තු +- 十一 +- usta +- ೆ +- ▁parla +- اخت +- ▁inches +- power +- 的这种 +- ずに +- berry +- 品牌 +- 引起 +- ▁historia +- ▁pret +- ▁fantastic +- 威胁 +- kru +- 当他们 +- ؤ +- esi +- ▁occupation +- rose +- ር +- ▁statue +- 決定 +- ясн +- ો +- ▁Ton +- ▁blij +- uɣ +- ogo +- ▁favorable +- 我从 +- ▁outline +- atur +- んじゃ +- ▁বি +- ▁conception +- ▁comun +- 大谷 +- меш +- 었 +- ▁session +- ▁fed +- 今まで +- ▁ridiculous +- 妹妹 +- бр +- ▁maig +- フランス +- ▁2017 +- ترین +- ▁глаза +- ▁её +- ▁Computer +- ▁vanished +- kauf +- ▁Unterstützung +- ▁gukora +- 做的事情 +- 业务 +- ▁schm +- 러 +- ▁gotten +- ▁pār +- ▁rece +- ▁zuerst +- கா +- 猎 +- ▁vergessen +- ▁weet +- ný +- ▁Japan +- ▁pointing +- ▁estos +- ▁unconscious +- だけど +- னா +- ▁permission +- هایی +- вел +- 一定是 +- 头发 +- ине +- 悔 +- ▁Zum +- ッシュ +- 杀了 +- 様子 +- ▁blush +- ▁sharply +- 做得 +- ▁befindet +- ▁cyf +- ▁football +- ▁alguna +- пов +- 泥 +- ghi +- 十八 +- ▁treball +- ▁inspired +- ▁pattern +- ▁crystal +- 効 +- ▁cultural +- ▁monster +- ▁Bücher +- ▁என்ன +- сел +- ▁pomp +- ▁これで +- 記 +- eyed +- mati +- トリ +- boo +- 庭 +- commerce +- pru +- ▁records +- 取得 +- culo +- ▁Mau +- ようと +- ▁Ban +- 特殊 +- ฉัน +- 采访 +- дра +- ош +- イベント +- 发出 +- ▁prayers +- ▁employees +- klo +- count +- ▁panel +- ▁crossing +- 擦 +- 这是我们 +- ▁Frank +- ▁kinda +- ghan +- reichen +- ▁shone +- có +- ▁tutti +- 真实 +- ▁avenue +- ちょうど +- kultur +- 在我的 +- ▁iyo +- ▁fetch +- essen +- ▁detect +- ▁England +- ▁село +- ▁난 +- ▁reaction +- 特定 +- 采 +- person +- 惊讶 +- ▁strangers +- ▁gemeinsam +- тыр +- ▁beard +- ▁gleiche +- ▁Africa +- 周り +- ▁сколько +- ▁steadily +- voca +- 两年 +- zira +- ▁startled +- ▁muttered +- 寄せ +- 你应该 +- 策 +- ▁jud +- 在他们 +- 闪 +- 개 +- ▁millor +- ▁vein +- に向けて +- 香港 +- 播放 +- ▁accurate +- 扬 +- 살 +- ▁Website +- ফ +- 覚 +- ▁whirl +- よし +- ope +- ລ +- 英雄 +- ▁我会 +- ▁gek +- фі +- 是怎么 +- ▁rap +- ますか +- ▁attacked +- ▁endure +- ▁tie +- يع +- rp +- rash +- ▁specifically +- ▁collected +- nahme +- uous +- න්න +- dru +- 稳定 +- oid +- と思った +- ▁каб +- how +- ▁以及 +- ▁anchor +- 行き +- ▁pia +- ingham +- ▁также +- 礼物 +- ▁Typ +- 理想 +- ▁lleva +- ないように +- 角色 +- uck +- ી +- ▁abstract +- ▁Steve +- Pro +- ▁secured +- ▁бес +- mist +- 生意 +- ▁consequently +- ▁vriend +- 的世界 +- ▁führt +- ໄ +- ▁railroad +- ▁ticket +- igis +- ▁Ji +- 道德 +- ▁disgrace +- 소 +- ▁compare +- 我必须 +- ▁wandering +- ▁knocked +- ▁tant +- liter +- ▁procure +- ▁oedd +- media +- ▁offensichtlich +- 都市 +- fed +- ▁続いて +- کار +- ▁resistance +- ▁ئى +- ▁menos +- ▁maken +- 空气 +- 제 +- stock +- 負け +- ▁disappointed +- fee +- 選手の +- ▁admire +- ่า +- ▁marie +- 立即 +- 質問 +- ▁directions +- eron +- ▁Jean +- ясь +- يات +- კ +- 汁 +- 兴奋 +- ▁egg +- كا +- ▁erinnern +- ▁Polizei +- ய் +- ▁È +- 吸引 +- iere +- ຍ +- ▁dip +- ▁starts +- ▁свой +- cir +- ক্ষ +- 兴趣 +- ्य +- fekt +- ▁Marc +- ▁coward +- 匹 +- ▁உள்ள +- ▁dalla +- とした +- ▁brutal +- ▁traf +- holen +- 肥 +- gian +- 鲜 +- 课程 +- 会儿 +- ▁resumed +- ຕ +- ▁территория +- schul +- ▁complet +- ▁sighed +- 限制 +- ▁elsewhere +- ança +- 质量 +- ノー +- ▁Versuch +- образ +- kwe +- ▁virus +- 天気 +- 筋 +- 的书 +- ▁timing +- 熟悉 +- 促 +- ▁дома +- 对了 +- ▁Damit +- 我给你 +- ▁Bel +- ối +- ▁Chance +- জা +- ルー +- 快点 +- これも +- anca +- ▁frau +- 周辺 +- ▁pierre +- 干嘛 +- ▁فکر +- 教堂 +- ▁確かに +- 香り +- 道理 +- agit +- 有多少 +- 指示 +- ம்ப +- ▁patch +- スーパー +- ▁tonight +- ▁Markt +- رم +- ▁Regel +- migrant +- miş +- sió +- 巧 +- kha +- Ha +- mination +- ▁astonishment +- ▁asia +- má +- kota +- дээ +- ▁Fehler +- ▁Dieses +- ▁persone +- cant +- ▁aquel +- 消防 +- ▁perfection +- cció +- rren +- 门口 +- fest +- ▁viz +- 粗 +- ▁nonsense +- schätz +- ができる +- 程序 +- ▁говорил +- ▁Clo +- siz +- ▁identify +- ós +- ▁omu +- 祝 +- 続き +- ▁род +- ock +- 表現 +- ▁slightest +- aĵo +- 豊 +- ▁пожалуйста +- ски +- ▁Lehrer +- ▁erfolgreich +- moor +- பு +- ▁players +- ▁vì +- ▁descended +- qual +- xo +- ▁hopeless +- ▁conducted +- 手术 +- ▁Ay +- ▁12 +- っちゃう +- 生活的 +- ▁dragged +- ▁بعض +- нет +- ▁inte +- мон +- ▁teu +- тө +- ▁liquid +- ▁hip +- find +- moni +- ▁State +- Е +- ▁shooting +- ▁legend +- 埋 +- ▁revenues +- ▁unglaublich +- abel +- слав +- ▁” +- 听起来 +- 所有人 +- ▁düşün +- 了他的 +- 残り +- fro +- ▁bend +- ▁wirst +- ▁akk +- ▁hitherto +- 成長 +- こうやって +- ▁dirty +- metri +- 分の +- ▁perceive +- ▁jur +- vra +- خا +- ▁năm +- 晴れ +- 酷 +- ▁poste +- лер +- lais +- ▁sav +- 认真 +- 毕竟 +- with +- ▁regional +- ▁scholar +- 苹果 +- 改めて +- 因素 +- டா +- кы +- ▁sembla +- zustellen +- ▁pupil +- ifying +- 缓 +- 手に +- 名前 +- ▁совсем +- 相談 +- ▁ал +- ▁kendi +- ▁promote +- ▁стороны +- 了我们 +- 发生的 +- прост +- ▁mateix +- ▁дума +- ково +- ▁Green +- 自动 +- lima +- もらう +- ели +- ▁departed +- ▁belonging +- ▁Star +- وز +- テーマ +- யின் +- ▁hesitated +- ▁Om +- 评 +- ▁Hoch +- 正确 +- 協力 +- ikan +- はどう +- ▁lane +- machen +- reba +- ▁ব +- 逮捕 +- 在美国 +- ▁гру +- osi +- '1000' +- 泉 +- ▁examined +- 方は +- ▁disco +- ▁Ale +- ▁terwijl +- ▁dig +- 避免 +- ▁leisure +- worm +- ▁depth +- 卢 +- ▁worried +- 做好 +- 出身 +- 生まれ +- ▁قرار +- 废 +- ám +- zzo +- culp +- ▁Gebiet +- ibya +- ач +- ▁scenes +- 王子 +- gelegen +- 基于 +- ▁partie +- lhe +- ▁belt +- রি +- änder +- ▁compassion +- ♪ +- 下了 +- ями +- ▁devotion +- ろう +- 他の +- ケット +- ▁monitor +- ▁häufig +- 検討 +- tö +- enta +- ▁sido +- 補 +- 聊天 +- 我一直 +- ▁гэтага +- ▁называ +- 蒸 +- ▁当你 +- shyi +- houden +- conc +- ▁device +- この日 +- 凉 +- ▁Del +- mica +- 剂 +- ドイツ +- 是要 +- 隣 +- ▁narrative +- ▁Medien +- ▁francis +- Р +- ▁beaucoup +- ▁whistle +- 她在 +- ▁kumu +- ▁droit +- 我们正在 +- nahm +- etta +- ▁fille +- Do +- ▁owned +- 通り +- もらって +- 写的 +- 疼 +- ל +- ām +- пра +- них +- gali +- shya +- ▁France +- ▁cord +- だし +- rab +- ▁estava +- ▁folgen +- 遺 +- 位于 +- ▁sä +- ▁Sport +- ▁kui +- 页 +- の方が +- nata +- ▁facing +- ▁பொ +- もない +- He +- ▁inventory +- 实验 +- uter +- raten +- Sa +- ▁drank +- قی +- 伺 +- ▁Barcelona +- ▁keeps +- ▁하고 +- ▁mult +- gni +- 금 +- ▁astonished +- ▁readily +- ▁unexpected +- ▁mug +- どうして +- ҙә +- 跡 +- еб +- ▁Hill +- 面临 +- onia +- ▁cinema +- čas +- 创业 +- ▁зап +- rücken +- ▁silly +- ▁facility +- ▁除了 +- 用于 +- ▁Monsieur +- ばかり +- ▁collect +- ▁انجام +- ▁gifts +- ▁doctors +- ▁emotions +- ▁republican +- ▁maintained +- ▁snake +- ▁سي +- 身份 +- ▁ней +- ▁models +- ▁gracious +- ibwa +- leigh +- بت +- ▁meines +- 警察は +- stall +- ▁percentage +- خوا +- ▁crop +- ▁ainsi +- 忘了 +- cide +- ▁killing +- ▁tool +- 的好 +- teria +- ▁mutual +- 剩 +- ▁wreck +- ▁chiefly +- ▁values +- ▁strengthen +- ▁cómo +- 같 +- 的能力 +- ▁wunderbar +- steigen +- ▁heir +- ▁2018 +- кат +- zī +- ▁அது +- ▁கூ +- breite +- ▁guter +- ▁duck +- 相对 +- 承诺 +- 叔叔 +- ▁bana +- 使う +- زار +- 组成 +- lö +- としても +- kap +- 操作 +- 物质 +- Ba +- 入って +- halte +- ▁tau +- ▁weapon +- ▁байгаа +- rok +- ▁graduate +- 心情 +- ブル +- ▁höher +- leu +- するため +- ▁vai +- ころ +- 五年 +- ▁landed +- 闭 +- ▁след +- ▁selected +- 放心 +- 秘 +- 等着 +- nol +- gling +- ▁对于 +- ▁lend +- ▁povas +- 券 +- ▁sources +- ▁Kampf +- はある +- ▁Gro +- ▁jemanden +- 要做 +- ▁peaceful +- як +- 规定 +- dolph +- 追求 +- univers +- 添加 +- ▁pump +- バン +- 特徴 +- 多年 +- easing +- 枝 +- ▁Louis +- ▁Rob +- 改革 +- 一本 +- ▁climbed +- ▁bueno +- ▁analysis +- важ +- シン +- ▁roger +- sein +- ▁riesige +- organ +- ▁plat +- ▁seventh +- imper +- ика +- ニー +- ▁breeze +- ▁slavery +- モン +- ▁Seg +- cru +- 这个时候 +- zte +- nici +- ▁grandmother +- 这是一种 +- 最後に +- 厉害 +- ề +- ▁usted +- ▁split +- 的儿子 +- 速度 +- gers +- ▁heavens +- شان +- ▁planta +- 奇怪 +- ▁Sed +- ▁yüz +- дай +- ▁saturday +- うれしい +- 奇怪的 +- 联邦 +- جو +- ydi +- ▁cyn +- ▁зах +- üste +- agne +- мол +- ায় +- ▁bwe +- бира +- toria +- 尖 +- 好多 +- undu +- ▁urban +- гч +- ыч +- 穴 +- ▁hiding +- ▁loan +- ແ +- ▁kant +- ▁complain +- ▁apa +- ▁точно +- 抜け +- 建物 +- ▁Win +- يب +- 재 +- 政府は +- ▁Professor +- ▁naked +- 期間 +- ▁essay +- 翔 +- ▁propose +- ▁circumstance +- めた +- 不见 +- tour +- 使われ +- гар +- 我妈 +- ▁zuvor +- 坚 +- mik +- 财 +- ▁museum +- ▁건 +- ▁engineer +- 日から +- uß +- ▁solar +- nın +- ▁gesamte +- ▁后来 +- となって +- 写了 +- ▁agreeable +- ▁canoe +- ▁rug +- ▁Hor +- جان +- 什么呀 +- ▁marble +- meister +- ەت +- ▁dorothy +- ▁confused +- ноў +- лат +- ffi +- née +- 否则 +- Applause +- ண்டு +- ▁complicated +- ▁guerra +- キー +- 你不是 +- ▁最后 +- class +- ▁swiftly +- lton +- 滴 +- ▁vollständig +- ▁будем +- ▁gleam +- ▁finest +- ザー +- ▁fiel +- ▁gusa +- ▁motive +- 時の +- ▁circ +- timo +- ▁こんにちは +- rated +- هی +- ect +- ▁moins +- ▁rested +- fau +- dden +- elijke +- 鼓 +- ough +- kub +- ▁quart +- ຫ +- 菌 +- skri +- まれ +- ▁Juli +- جه +- ▁dorm +- ▁selfish +- 混乱 +- ▁佢 +- なんですか +- ▁examine +- ▁insurance +- гай +- 一开始 +- ▁disturb +- cción +- 政権 +- 伏 +- 征 +- ▁Foto +- ▁stati +- 讯 +- xed +- ▁chemical +- ром +- ships +- ligen +- ▁halb +- ▁introduction +- ▁дальше +- ▁dish +- 联合 +- posa +- 积 +- 基因 +- ይ +- 商店 +- 粒 +- 不去 +- ▁Fran +- 彼此 +- ▁nieder +- ▁mache +- 都要 +- три +- 気になる +- ▁основ +- ▁achieved +- ▁نظر +- 込む +- زم +- 高的 +- Di +- 使い +- 某些 +- ▁Joe +- пам +- ▁cloak +- 投入 +- ▁captured +- 週 +- 就这样 +- сс +- 所谓的 +- ▁arī +- ▁spoil +- nuncia +- ▁ɣef +- word +- ▁materially +- ▁thực +- ▁öffentlichen +- ▁яв +- gum +- ▁ئۇ +- ▁quote +- ▁indicated +- ▁stor +- tare +- ▁kaufen +- ▁google +- ▁unterstützen +- 对自己 +- ▁wichtige +- ▁recovery +- mensch +- ▁Ehe +- ▁lessons +- neɣ +- 던 +- 怎么回事 +- octubre +- 頑張って +- أكثر +- ▁testing +- އި +- ști +- dır +- ▁byose +- ▁polite +- рова +- imana +- ▁exhausted +- ▁rivers +- を行う +- 帝国 +- oval +- ▁例えば +- ▁Rom +- 这不 +- ▁Yn +- ▁patent +- 動画 +- рас +- ▁mole +- 不必 +- cura +- ▁anh +- ▁überall +- 美国人 +- ▁disappointment +- ▁soup +- ktu +- ▁transformation +- ▁Verhalten +- 後半 +- ▁байдаг +- 合う +- ▁Sil +- ぜ +- 扎 +- trud +- ży +- ▁shepherd +- ▁возможно +- レス +- cier +- ▁sunshine +- ▁Telefon +- 愉快 +- ▁havia +- aşı +- のだ +- 进行了 +- 农 +- ▁continuous +- ▁públic +- ▁poz +- ▁давай +- 满意 +- 悲伤 +- lma +- ▁curse +- reb +- 空気 +- قال +- ▁player +- 安定 +- maker +- 的那样 +- ▁ntabwo +- ▁способ +- heben +- ▁hecho +- wide +- Ho +- then +- ありました +- ▁pendant +- ▁咁 +- 但它 +- 能量 +- ▁Himmel +- 十九 +- 有些人 +- 年的 +- ▁свои +- ▁practically +- ость +- ▁deserted +- 说他 +- ▁buying +- ▁thorn +- ▁holds +- 信仰 +- lings +- ▁glimpse +- ▁CA +- ▁Kal +- slav +- ҙа +- 思います +- 何も +- ँ +- regen +- ッチ +- druk +- قص +- ▁preferred +- 续 +- بة +- ▁мин +- fälle +- ▁travail +- ▁他是 +- ழு +- ▁matt +- ít +- eği +- 勝ち +- মি +- ▁ваше +- するのは +- ෝ +- 出した +- ▁bite +- 黄色 +- مال +- ▁preparing +- ▁há +- ử +- digit +- ▁World +- かせ +- ieron +- 答 +- 遇 +- カル +- ファ +- بن +- 화 +- ▁priv +- kaza +- ▁giờ +- ப்பட்ட +- ▁bé +- ▁چند +- kut +- ▁darf +- ▁دوست +- 具有 +- fes +- еть +- ▁Cette +- konstru +- ▁thứ +- ▁jako +- ছিল +- organisation +- ▁Essen +- ▁surprising +- 渴望 +- 资 +- 浦 +- isieren +- ▁байв +- ▁Ideen +- స +- ▁größer +- besitz +- bour +- ән +- werfen +- ▁Ebene +- dah +- 美丽 +- 监 +- ドラマ +- じゃん +- ▁калі +- 独自 +- 時期 +- ▁cose +- ▁چیزی +- ▁joining +- ▁confession +- 天天 +- ▁processes +- 보 +- ▁比如说 +- 也就是 +- ーム +- 是我的 +- ▁Stil +- スピード +- 少年 +- ▁fühlte +- 予 +- vig +- ики +- ▁nap +- ▁angels +- ▁schedule +- rán +- ▁boston +- ▁oude +- まま +- душ +- 腾 +- ху +- 咖啡 +- валі +- ▁лес +- 刷 +- ▁suspected +- ▁inquiry +- ▁amusement +- ordnung +- ▁Klasse +- 食べる +- 我记得 +- 连接 +- فق +- ▁precisely +- ープ +- ▁왜 +- ▁Thomas +- 場合は +- schlag +- னர் +- ▁instructions +- turi +- ▁passieren +- ▁medium +- 姓 +- ▁укра +- にする +- wasser +- ▁cigar +- ▁pessoas +- ▁thirst +- ▁gö +- がこの +- ▁norman +- geni +- 拜 +- rina +- اک +- 出现了 +- 纯 +- ffin +- 忘れ +- 戦い +- 效果 +- こういった +- kõ +- ▁مق +- ▁روز +- 妙 +- ▁jones +- ▁During +- 碰 +- 这条 +- っちゃ +- рон +- pack +- gina +- が起き +- ▁sins +- gelassen +- prä +- ▁Wand +- istes +- plat +- ▁telephone +- conf +- 桌 +- ▁shine +- 不知 +- جل +- ▁Red +- ▁pearl +- ▁prec +- 家の +- ▁Süd +- ▁într +- 牧 +- agu +- お話 +- ▁sym +- 意見 +- case +- ▁2016 +- ▁tenía +- ▁Netz +- lusion +- ▁sola +- なくなって +- 克斯 +- 习 +- енно +- ▁lucy +- 日間 +- 단 +- መ +- 肺 +- Sch +- 文章 +- ▁festival +- нае +- дам +- ppy +- ▁glaubt +- ило +- 漏 +- सा +- は何 +- rico +- 吃了 +- ▁première +- 移動 +- ▁River +- 穷 +- ▁sensitive +- 今週 +- ▁provides +- нуть +- 今度は +- 获 +- ▁attract +- ▁durante +- 桑 +- ▁கு +- を作って +- volv +- ື +- ▁armen +- 現 +- abwe +- 세 +- ých +- 隊 +- chet +- ▁euro +- kula +- ▁hoofd +- ▁visitor +- ▁nooit +- ятся +- にした +- ▁August +- 每年 +- ▁هست +- ▁colors +- ▁buri +- ▁definitiv +- ▁Wu +- かない +- eno +- গে +- ▁لكن +- egi +- ▁своей +- ▁Jordi +- pfen +- ▁solicit +- ▁goods +- ▁composition +- gine +- 第二天 +- までの +- ▁intellect +- ▁vow +- 现在是 +- ▁frown +- ▁furnished +- ▁обе +- য়া +- 收到 +- ▁sicherlich +- ▁setzte +- ▁harsh +- 専門家 +- ▁Тэр +- 社長 +- 的一切 +- ハー +- 迅速 +- hend +- られています +- 毁 +- ന +- 順 +- ▁lunga +- 最初の +- anta +- ▁expectation +- ▁أي +- ью +- ▁дээр +- ▁মা +- udo +- ▁jail +- 计算 +- ▁나도 +- お父さん +- fä +- '*' +- ▁Studenten +- ▁Gut +- ▁你可以 +- 合同 +- рус +- zam +- rul +- 帅 +- ▁Why +- ▁moderate +- 块钱 +- හි +- alia +- ▁timid +- ▁cigarette +- 没关系 +- ologist +- rufen +- legi +- دید +- ▁முடி +- 敷 +- 方も +- 刺激 +- 误 +- ▁fortunate +- ▁crow +- kehr +- ーション +- ▁nut +- 你认为 +- frei +- ▁mình +- ▁existed +- 烈 +- lij +- agost +- ▁partnership +- ู +- ▁ده +- ▁così +- ▁hebt +- fire +- ▁país +- druck +- Ә +- ▁demon +- ▁improvements +- 这样一个 +- ▁jealous +- 化学 +- 連絡 +- 医学 +- huit +- ▁transfer +- گیر +- ▁favourite +- 强烈 +- お伝えします +- 卫生 +- ▁jerusalem +- ঘ +- ارت +- 化的 +- ▁parallel +- ções +- 概念 +- 我们知道 +- anu +- したと +- ▁我不知道 +- 候補 +- 在他的 +- iens +- 적 +- مى +- ▁Jud +- に戻 +- ▁Gleich +- ▁dynamic +- ▁arrested +- 母親 +- ▁establishment +- 何を +- ▁père +- ▁Regi +- لف +- 科学家 +- ▁xwe +- 갔 +- ▁Fan +- ▁television +- bach +- zes +- 数学 +- கே +- ってくる +- ớ +- ▁граждан +- 下一个 +- ▁стран +- loj +- ▁是啊 +- ▁masters +- лез +- ▁какая +- オープン +- ▁option +- ັ +- ▁identity +- ▁submit +- ▁jusqu +- 抑え +- gefühl +- ▁medio +- bira +- 我应该 +- 勝利 +- ĝis +- ▁handkerchief +- 夫妻 +- 孩子的 +- ▁vue +- ▁sober +- 阵 +- ▁lightly +- clos +- ▁laten +- chie +- şti +- 那个人 +- informa +- rough +- ▁links +- ▁distinguish +- tego +- ▁civilization +- ▁jaar +- ▁drugs +- ▁convenient +- いると +- ನ +- ▁например +- dock +- ▁disturbed +- würdig +- ▁deutlich +- hri +- destin +- 免费 +- zina +- ▁revenge +- zogen +- чка +- 瀬 +- ▁concealed +- rwanda +- ▁باز +- fahr +- ilo +- 한테 +- 奶 +- いるんです +- аба +- 得点 +- institu +- нан +- ▁hedge +- ▁Every +- 違い +- ▁Ly +- ▁بس +- свя +- 你这个 +- ▁dressing +- 猛 +- 俗 +- ச்சி +- ▁Ang +- 问我 +- ▁بزرگ +- ▁chapel +- ходит +- ▁kiu +- ений +- 我跟你说 +- 堡 +- utter +- 躲 +- ▁ໄປ +- ▁fabric +- ▁frankly +- を出 +- 漁 +- 队长 +- ▁هل +- 的歌曲 +- 쫌 +- baar +- 还有一个 +- ▁boast +- ▁Pf +- ▁bunu +- ▁Kas +- сол +- rock +- 元気 +- お願い +- ▁Santa +- imento +- 抜 +- ▁чувств +- ණ +- 書いて +- ▁dwelling +- 聞いた +- ▁Laufe +- brechen +- stol +- 也在 +- раж +- 给她 +- ▁arrangement +- ▁Са +- zat +- jung +- '29' +- minded +- ▁vivid +- ▁18 +- 走到 +- 去看 +- ▁weren +- うち +- ▁Mir +- 背后 +- abilir +- ホームラン +- ures +- 上がる +- nischen +- bridge +- lara +- hound +- ▁technologies +- ▁accompany +- დ +- tempe +- ▁cream +- ▁anticipated +- même +- лей +- ▁Linie +- が行われ +- ▁Pres +- 言った +- past +- ruf +- ▁trait +- uche +- 모 +- ▁scotland +- ▁struggling +- ▁vậy +- posit +- 患 +- ミサイル +- ▁preserved +- ▁зачем +- ▁defeat +- avu +- ▁aby +- ▁pint +- iji +- leute +- known +- 済 +- サービス +- ▁ёсць +- 実際 +- 的国家 +- ▁são +- ▁copper +- 宾 +- ▁Partei +- ziel +- ▁illustration +- ими +- politi +- ハン +- ▁US +- bry +- ▁verge +- öd +- 法官 +- itud +- 番の +- 差不多 +- 很有 +- ▁jury +- ▁Pas +- 知道了 +- ▁объ +- edifici +- 雨の +- 类似 +- ▁stamp +- stimul +- 我们应该 +- ▁monte +- ީ +- 称之为 +- 细节 +- ote +- ▁номер +- ライン +- 日中 +- 赌 +- ctive +- 页面 +- gion +- ▁creative +- 始 +- ▁Venim +- ▁beings +- schloss +- ▁influ +- 它在 +- ▁гэтым +- 传播 +- 子どもたち +- ▁Vous +- mış +- ▁sino +- fach +- ▁Stelle +- 対象 +- 个月 +- big +- ▁autumn +- chron +- ▁давайте +- ▁eaten +- ▁crept +- цэ +- 値 +- ▁defense +- 激しい +- ▁echo +- nelle +- ▁owing +- ▁Black +- ▁moest +- ▁behalf +- ▁происходит +- impa +- ▁minimum +- niz +- ▁investiga +- 着急 +- ▁october +- られます +- ▁virgin +- ▁lean +- ළ +- ▁ответ +- えた +- ▁звони +- いけ +- 享受 +- äch +- 你们的 +- onge +- ▁Hände +- trat +- ▁outward +- 这里的 +- ▁notwithstanding +- ▁volunteer +- ▁Smith +- чная +- 去吧 +- 湾 +- 帯 +- きている +- вез +- 制限 +- maya +- どうですか +- ▁clothing +- 性格 +- ▁Baby +- bbing +- වා +- 花了 +- аю +- ▁entwickeln +- ▁approval +- ▁irrit +- 他没有 +- ▁Alles +- ▁associate +- ▁kenne +- ▁bolt +- 永远不会 +- lug +- ▁perfekt +- gué +- nutz +- 想知道 +- ▁bestimmten +- ▁nail +- ビー +- ыми +- ▁hatred +- glia +- 保存 +- ▁answers +- ▁hefyd +- 不懂 +- 催 +- aeth +- ▁math +- 的一种 +- ounce +- eco +- қа +- ĝa +- regel +- うん +- ▁betrachten +- பா +- nyama +- 起き +- 玛丽 +- ▁Fort +- 願 +- ▁Chúng +- wald +- moto +- ▁sustain +- 명 +- ▁founded +- ▁bringt +- 工资 +- undi +- Emp +- ▁isso +- ▁pris +- と一緒に +- eaux +- ủ +- aĵoj +- ても +- 海洋 +- ▁passes +- otto +- ▁Gesundheit +- կ +- 先週 +- unga +- зал +- ▁immortal +- ▁covering +- ▁wash +- ích +- bula +- казать +- wak +- issen +- 担 +- 泽 +- nico +- ▁hyn +- ▁react +- ▁мор +- mol +- ▁definite +- ▁decrease +- ▁scarce +- ▁beating +- print +- 強化 +- ▁Wind +- egu +- ▁справ +- леп +- gga +- ▁nhưng +- ▁poc +- 是最 +- ங்கள +- ▁compete +- かかる +- ▁territori +- hana +- 続く +- ground +- 経 +- 就是我 +- 買 +- وری +- 邦 +- ミリ +- ▁Gib +- nă +- ▁segui +- 种族 +- gung +- ▁formerly +- ▁gün +- ents +- யும் +- кие +- ▁Kindern +- ▁farewell +- ▁cotton +- пон +- ▁mechanical +- ▁enabled +- थ +- wesen +- ▁prejudice +- 과 +- ▁Organisation +- کان +- ▁beasts +- 敌 +- ▁breathing +- ▁đề +- ▁dues +- ▁grandes +- ▁letting +- wegen +- ▁provision +- ▁fishing +- ▁ongoing +- ŭ +- 地位 +- ▁그러 +- 和平 +- ▁länger +- ▁canada +- 过的 +- ▁alma +- ▁Pal +- cular +- வெ +- ▁elegant +- 司令 +- かね +- ▁interessiert +- ▁feu +- ▁violet +- маш +- ▁Landes +- ▁gazing +- gestalt +- 一个小 +- нат +- klar +- ló +- ▁gallant +- pä +- ▁tutto +- ▁În +- kore +- 正确的 +- lever +- ▁restrict +- ▁Lösung +- ▁applications +- ▁witch +- ительно +- yong +- 会在 +- تج +- 彼女 +- stärk +- たちは +- гре +- ▁Spi +- ▁tribes +- η +- six +- sitz +- komen +- rê +- ▁geboren +- ▁niets +- ▁prominent +- けどね +- 诉 +- 添 +- 发生了什么 +- tamente +- ▁unver +- 韩 +- ỗ +- ▁viņa +- heure +- ያ +- 兹 +- るんだ +- 巨 +- дж +- ඉ +- ři +- ▁upstairs +- ▁entertain +- цию +- ▁indicate +- kab +- ▁rarely +- 株 +- ▁imperial +- pression +- дем +- 应用 +- 形で +- crib +- ▁replace +- ▁beaten +- 阴 +- ▁আছে +- 平时 +- 立刻 +- 一群 +- ▁Außerdem +- джа +- ▁seltsam +- ▁ته +- 유 +- いわゆる +- দা +- ▁dimension +- ▁Bob +- 岁的 +- 思い出 +- open +- 孤独 +- ▁disaster +- களில் +- baba +- ▁touching +- をかけ +- ▁tribe +- kati +- 航空 +- ▁Nation +- ▁maxim +- 留在 +- 방 +- となり +- 手が +- ▁ähnlich +- ēt +- ▁strict +- いきたい +- により +- гән +- ▁wives +- ▁политик +- クラ +- どちら +- ▁Herausforderung +- 这一切 +- ▁sailed +- lash +- ▁hanno +- тка +- 月の +- ▁Vorstellung +- sible +- участ +- 倾 +- namen +- हरू +- 込まれ +- daw +- 揚げ +- っていく +- nsen +- ▁collar +- ނ +- hill +- ▁petite +- alität +- ▁grab +- ▁Kam +- ▁egal +- 绿 +- 事业 +- ores +- ▁bewegen +- besch +- ▁Titel +- 了她 +- きょうの +- 这两个 +- 危険 +- labor +- ▁funds +- 十三 +- 갈 +- ▁jaw +- ▁senator +- ▁doorway +- ationen +- ્ +- зар +- litz +- dó +- ▁определ +- ▁あれ +- ▁accent +- yana +- ▁wrath +- Li +- ▁moses +- arna +- ▁Hay +- ええ +- ▁interfere +- ▁Text +- ▁можна +- 臭 +- なんですけれども +- embra +- zó +- лич +- 描 +- 发表 +- ぁ +- itor +- ▁cheese +- ▁stato +- 돈 +- ▁Mitarbeiter +- ▁label +- কার +- ▁spun +- ▁propos +- ▁speaks +- 在家 +- 赞 +- ór +- ▁finds +- 市で +- 眠 +- ነ +- flor +- ニア +- ▁мяне +- ▁اگر +- ▁operator +- tention +- ▁definition +- 見事 +- вра +- ▁büyük +- tribut +- порт +- 非常非常 +- вяз +- rov +- ▁walter +- oti +- ▁marched +- ▁restored +- ells +- ▁trug +- 好啊 +- onda +- ▁riches +- ▁Hoffnung +- 太太 +- ுடன் +- lob +- ▁belongs +- ▁hans +- 非常重要 +- ▁présent +- 分子 +- 某个 +- 讨厌 +- ▁attracted +- しまう +- ▁encounter +- nnes +- zzle +- レイ +- ▁attain +- ▁вами +- ▁mutter +- 开车 +- 바 +- ▁funeral +- щу +- اره +- ▁shiver +- 创新 +- ▁стал +- treiben +- 保险 +- ▁lightning +- ▁complaint +- ▁african +- 重大 +- 你跟 +- ▁같은 +- چى +- 对他们 +- ▁quel +- ▁sama +- ▁обо +- 遊 +- aves +- ▁manufacturing +- 汗 +- 教会 +- 做一个 +- ▁wid +- мент +- ▁november +- ▁месяц +- ▁elbow +- ▁tank +- ほぼ +- лем +- ति +- NHK +- кин +- ▁Bilder +- ▁zeker +- стве +- 一个非常 +- ▁curt +- ტ +- ▁asset +- 破坏 +- anye +- ▁differences +- ▁нашей +- clin +- ウクライナの +- 行われた +- ▁Shi +- dors +- ▁democratic +- ▁Obwohl +- صف +- هر +- ▁feeble +- leş +- 这种情况 +- ssin +- を受けた +- kai +- ▁robust +- ▁besondere +- ▁elected +- ▁Modell +- ▁seconds +- lauf +- ダイ +- ګ +- セット +- 時は +- Applaus +- 白人 +- ▁свобод +- 病毒 +- 损 +- ජ +- ▁Mat +- ▁executive +- ▁onto +- lag +- ▁важно +- ან +- ▁concerns +- ء +- ebbe +- ▁Tru +- ▁seed +- 推荐 +- ▁Staat +- 祭 +- ▁cleared +- მო +- eixen +- 让它 +- သ +- wiesen +- Ro +- train +- ▁acknowledge +- 取れ +- 風が +- 依然 +- ▁indifferent +- 人数 +- ▁generate +- ▁frost +- itaj +- ▁reminded +- ▁romance +- 隠 +- ▁ireland +- ▁Ха +- 最多 +- ▁layer +- 柳 +- 就有 +- ▁caution +- ▁bezeichnet +- ▁denied +- سە +- ▁profile +- 的位置 +- tato +- ▁politische +- ▁Dra +- 高度 +- ちゃった +- ▁Bay +- 成立 +- ▁stephen +- laş +- 変わり +- neb +- ält +- oza +- coloured +- рок +- ▁independence +- ▁traveller +- 接着 +- utu +- 许多人 +- ▁leather +- 这项 +- 进步 +- ▁exists +- kia +- 进一步 +- ړ +- ▁Char +- ▁English +- cional +- maakt +- ▁zá +- ▁wereld +- ▁дэ +- 他们都 +- ▁Sicht +- 出来了 +- وك +- ▁christians +- 種類 +- gone +- ▁говорят +- ▁danach +- ▁positions +- 问你 +- यो +- ▁daylight +- wall +- えない +- 不住 +- ▁echt +- 脳 +- 搜 +- ▁consists +- děl +- 人在 +- ▁Adam +- umva +- ▁formula +- anzi +- ▁тысячи +- ▁casual +- ины +- ▁eighth +- plica +- guer +- 世界的 +- ▁استفاده +- ▁German +- ▁بیشتر +- ▁Ŝi +- ează +- 反対 +- ▁determination +- 时刻 +- iller +- 히 +- 不愿意 +- ▁republic +- respon +- alen +- liste +- ▁outer +- ▁echte +- 对待 +- 私たち +- ▁которых +- って言って +- 你先 +- ่ +- ▁aveva +- nym +- acions +- ▁然后他 +- ▁democracy +- 填 +- ▁гэты +- ▁Bruder +- ▁characteristic +- ▁Kris +- ▁neuro +- politic +- ▁visitors +- ໂ +- ▁gale +- tır +- ▁protestant +- ▁befinden +- ▁одиннадцать +- ▁Schi +- いまして +- öhn +- 연 +- そういうこと +- arbeiten +- лас +- ▁porter +- 做到 +- cun +- нер +- nette +- atan +- ▁zweitausend +- ల +- ట +- öö +- ▁relevant +- 鼓励 +- 很棒 +- fford +- ▁dios +- ▁Tak +- 个小时 +- 我发现 +- ▁Ul +- rating +- ▁creek +- ▁aggressive +- ▁трэба +- それでも +- 手里 +- へと +- بح +- нік +- 一个问题 +- 公主 +- 见面 +- ▁saa +- 始め +- 崩 +- ivo +- Mi +- ▁extensive +- ▁спасибо +- ▁properties +- ▁Asia +- ▁Gran +- 序 +- 애 +- posing +- 发生的事情 +- 想起 +- 十七 +- ▁Anwendung +- лө +- ▁Member +- トー +- 孔 +- ▁наши +- ▁span +- ▁swung +- ▁perception +- हा +- ▁ox +- 劝 +- ▁laying +- ▁carpet +- wch +- vē +- はもう +- 赚 +- ▁Demo +- mala +- daten +- მე +- quar +- ▁تمام +- anima +- ▁radical +- ▁власти +- anı +- ▁Ils +- 証 +- '00' +- hub +- ало +- の姿 +- るのが +- раў +- 読 +- '%' +- ваў +- 监狱 +- すいません +- ▁execute +- bred +- ▁profess +- ▁Fenster +- 不上 +- exc +- ▁torment +- 互相 +- ▁unge +- 着我 +- ரிய +- の一 +- ▁sci +- 設置 +- versa +- ▁größten +- vê +- ▁graceful +- popul +- 夺 +- ಸ +- 採 +- とにかく +- name +- ▁Partner +- あるいは +- ▁nuestra +- ▁impressed +- ức +- 对不对 +- IT +- 方便 +- pek +- 一周 +- ਾ +- ම් +- ▁California +- ▁assembly +- ▁bran +- жда +- 可以在 +- ▁lei +- 在她 +- 有趣的 +- ▁divide +- 直播 +- nemen +- の話 +- 求め +- 彼の +- ▁trends +- ▁veröffentlicht +- илось +- 助け +- க்கப்பட்ட +- ▁دې +- ▁rapport +- ▁tones +- nego +- 怎么说 +- ົ +- 也很 +- grat +- träge +- ▁großartige +- ▁cape +- ▁successfully +- سى +- علم +- 撮 +- jah +- 安妮 +- ▁whit +- inin +- ▁Funk +- 他自己 +- ▁funding +- jär +- 女朋友 +- ホテル +- ▁poly +- ▁awkward +- iyorum +- wich +- lish +- ▁Jung +- cata +- جم +- ▁Then +- bby +- embe +- ▁Kein +- 防止 +- 访问 +- ▁blanket +- 鉄 +- ▁andrew +- ▁grounds +- ▁چا +- ▁Israel +- ▁pap +- 出てくる +- ▁convention +- றி +- えっ +- ▁Wieder +- 充满 +- ▁Australia +- fik +- аем +- ▁schlechte +- ▁Tier +- ▁febrer +- 屈 +- 肖 +- 課題 +- idas +- ▁milli +- ▁parish +- borough +- 進め +- ▁Zeug +- عب +- En +- ▁exceedingly +- 柔 +- 砂 +- 郑 +- ▁таксама +- leb +- 最低 +- 当て +- ▁japan +- 你把 +- ächtig +- sort +- ▁shudder +- ▁profitable +- 如果他们 +- ▁放一下 +- クロ +- ände +- 他们说 +- ▁Durant +- 很高兴 +- pare +- artig +- schlossen +- ▁ora +- ▁அந்த +- cà +- 掉了 +- fè +- ▁canal +- ▁administra +- ▁bears +- 出现在 +- ▁shriek +- ▁знал +- ebilir +- ▁schrieb +- contra +- ▁roar +- 間に +- 液 +- 相互 +- ▁package +- でお +- ▁Maar +- 식 +- けども +- ▁romantic +- ▁tales +- ▁gelernt +- ▁invent +- 大体 +- ▁anyhow +- を作る +- stoß +- ▁podcast +- ▁Пра +- ლი +- 奔 +- lp +- män +- кар +- mesi +- 的观点 +- gani +- Mail +- ▁mwa +- サッカー +- ▁alien +- tiu +- 是真的 +- 計画 +- りの +- 診 +- 医疗 +- 明确 +- ▁parece +- десят +- иш +- 哇 +- ▁meta +- CO +- lec +- ▁但是我们 +- argent +- 炸 +- 我先 +- ▁studying +- shaped +- 的样子 +- ▁anymore +- ▁attendant +- 可怕 +- ▁erfahren +- nade +- ▁zona +- ▁Dame +- ▁containing +- ▁Patienten +- ▁болон +- 統 +- ר +- ▁ჰო +- ▁соглас +- মে +- 目に +- ▁gyda +- ▁endeavour +- 那时 +- 発射 +- бур +- ▁cael +- จะ +- 言われて +- ▁meg +- 観光 +- ▁envelope +- ▁говорю +- ▁snap +- ▁truck +- 电子 +- ຊ +- ▁endeavor +- ▁stole +- ▁beheld +- 焼 +- 这也是 +- energie +- ▁acid +- ▁communicate +- ▁Хо +- 估计 +- 怎么会 +- зем +- මා +- ▁Australien +- fish +- 嫁 +- igheid +- odi +- 区别 +- нев +- liko +- tale +- ▁Camp +- 文学 +- krit +- hielt +- ▁tube +- ▁карт +- igne +- ді +- 我再 +- 的身体 +- obo +- 告诉我们 +- ▁dhateng +- ▁invention +- ări +- ▁அவர +- ▁Herrn +- าย +- ▁cunning +- folk +- をしている +- ▁Fra +- ▁ignor +- ▁Denken +- 付き +- ▁слуша +- 実験 +- patri +- ▁Ganz +- ▁kurze +- ▁saving +- 東北 +- 韦 +- るのか +- ▁baz +- ▁Freude +- ▁километр +- 責任 +- ▁restless +- ском +- ▁Ty +- ▁факт +- ▁whip +- 收集 +- となります +- スタン +- ▁folly +- ▁wool +- ▁uses +- મ +- ▁Unsere +- 全員 +- стат +- ▁regards +- ▁Flu +- El +- ▁recommend +- るような +- tering +- ству +- ీ +- cope +- 每天都 +- harmoni +- ▁کرده +- ▁appointment +- 下雨 +- 广泛 +- ehr +- für +- 向き +- cell +- юр +- വ +- 设置 +- 这只是 +- ▁erzählt +- publik +- ▁nämlich +- 经理 +- ▁senate +- ▁perpetual +- 的研究 +- 昼 +- ற்று +- ▁Grand +- 和我们 +- ものです +- まって +- hra +- ▁laura +- 可能性がある +- ▁Cho +- ▁mama +- ▁Cam +- chair +- ▁chart +- ▁опять +- ▁injury +- scheid +- ambo +- ▁gospel +- projekt +- cı +- ▁elephant +- ▁наверное +- nisse +- ▁dislike +- ▁stomach +- ▁amendment +- 応援 +- ▁barrier +- ▁upset +- 独特 +- 白色 +- ▁dedi +- 無理 +- ▁extract +- 時間が +- 小说 +- ▁pains +- 就是你 +- 観測 +- ▁所以说 +- 粘 +- 選手は +- yin +- ▁этому +- ▁lass +- 湯 +- 指挥 +- ▁illumin +- rush +- 進んで +- দের +- élé +- ▁helps +- fia +- ▁Bildung +- ▁Jeder +- 亏 +- 億円 +- ご覧ください +- gress +- ▁romans +- ▁Haben +- トラ +- 国家的 +- mato +- hut +- 支え +- ▁tudi +- ▁disappear +- ына +- ▁Gesundheits +- ▁ነው +- ▁Beziehung +- cam +- ▁Drei +- 扣 +- ▁celebrated +- ▁coisa +- ▁convey +- ▁aussehen +- weis +- ▁Hon +- 止め +- 碗 +- ▁acquisitions +- 关闭 +- ▁Ба +- 超级 +- ▁tables +- ໃ +- ▁ndetse +- 一面 +- ▁واقع +- ีย +- ibility +- 載 +- ilor +- 信じ +- وف +- раг +- grade +- 上の +- ▁Viņš +- ӧ +- 手段 +- бил +- ▁damp +- ▁Alex +- ▁taxes +- ▁chez +- ▁50 +- ▁zumindest +- がいい +- 盤 +- brau +- 歴史 +- kopf +- ▁wordt +- ▁ເອີ +- 두 +- 차 +- เป็น +- 茨 +- ▁cable +- ▁grows +- ▁constitute +- ▁یې +- ▁interessante +- てしまう +- 備 +- ▁despre +- ▁promo +- 回目 +- 那个时候 +- ▁imbere +- 探索 +- phon +- 途中 +- ▁machines +- ▁Handel +- ▁salvation +- ▁parted +- ▁punto +- алі +- ▁moja +- 壮 +- halb +- ▁corporate +- 者的 +- ▁flank +- 勇 +- ▁relieved +- ますよ +- ена +- drückt +- Н +- ▁churches +- めちゃくちゃ +- 自民党 +- 进了 +- 仁 +- ▁swim +- ▁loĝ +- ▁الآن +- hnen +- 也不能 +- ▁eben +- とっても +- ټ +- ▁getroffen +- ▁stake +- kou +- ▁Fisch +- schap +- 太好了 +- zwi +- ▁vive +- 如果他 +- 最重要的 +- γ +- ますが +- 軍事 +- ▁woe +- ウェ +- 息子 +- ▁Steuer +- 肯定是 +- ▁tay +- ▁boss +- граф +- rons +- lasse +- 灭 +- க்கும் +- lini +- 体験 +- ▁delivery +- ▁temptation +- 忘记 +- ウル +- 奴 +- ▁thành +- ▁په +- ▁writers +- ▁assistant +- ▁Tam +- кө +- 我爸 +- ▁boom +- чный +- だね +- اپ +- weil +- uwa +- ▁facilities +- ▁monkey +- 私たちは +- صور +- ▁тому +- સ +- 真相 +- 这就 +- 削 +- ▁Sim +- ▁squire +- ▁britain +- 败 +- ۰ +- しかった +- ちなみに +- ستان +- ▁trifle +- dist +- ▁options +- empat +- 敲 +- ▁Monat +- 会说 +- стой +- しようと +- ▁langen +- ப்பி +- ▁Az +- 焦 +- ますので +- ছ +- sters +- 糟糕 +- erei +- ▁subtle +- 胎 +- に行く +- ▁яна +- ▁времени +- スタジオ +- ▁сообщ +- 普遍 +- yim +- pē +- ▁sitzen +- プーチン大統領 +- 你没有 +- — +- ▁Monate +- ▁dive +- ▁Fre +- 各地 +- ▁bias +- ml +- ▁bị +- ▁gardens +- ▁ceremony +- ▁roots +- غل +- geschichte +- ▁Ĉi +- ▁minu +- ▁имеет +- 来年 +- iaeth +- 小さな +- 饿 +- ▁можа +- ▁Hall +- 帽子 +- کش +- ▁pense +- ▁Tisch +- эконом +- ufu +- の世界 +- ▁Leistung +- ▁steer +- сю +- hul +- ーク +- ですので +- 爷爷 +- вон +- ▁wax +- 违 +- дь +- 一个月 +- ▁isabel +- 向你 +- ె +- lari +- ▁peril +- メッセージ +- すぎ +- 것 +- ▁общем +- ▁Ken +- ▁natives +- 邪 +- ▁Anda +- nice +- 结合 +- 把她 +- chang +- кро +- ▁Kunden +- 首都 +- 地点 +- ▁launched +- ▁cran +- вен +- фер +- 開始 +- ப்பா +- ▁Dum +- 的意思 +- ▁dragon +- ▁silently +- なければ +- 荣 +- ▁Junge +- ▁searching +- 这将 +- ▁erklären +- ▁scratch +- ▁ehrlich +- emba +- ▁Liste +- 応 +- 까지 +- ▁fitted +- とする +- GAAP +- ▁limits +- რი +- を使った +- 배 +- ovi +- eight +- раш +- ▁marine +- ▁Tar +- ▁Many +- 壊 +- 년 +- ▁troubles +- ▁nuclear +- ▁lodging +- خص +- ▁termina +- 多分 +- Weiß +- 有所 +- acak +- wise +- prof +- num +- 英语 +- abana +- 胜利 +- cada +- autobús +- ▁residence +- ولا +- ▁zeg +- ▁Jim +- 抵 +- ▁Bö +- ▁Hauptstadt +- 京都 +- iling +- 我把 +- ▁appetite +- ▁oameni +- ▁cardinal +- häng +- 地元 +- ▁doubtful +- reiz +- リスク +- 来て +- 慌 +- ғ +- しょう +- ▁bother +- 算是 +- ▁reverse +- ▁dearest +- ของ +- ▁занима +- ▁giả +- 不像 +- ▁newspapers +- ▁boil +- のうち +- μ +- ▁мужчин +- 一声 +- ār +- ▁fury +- かわいい +- ầ +- api +- ố +- ở +- ▁Aufmerksamkeit +- shore +- 堆 +- ▁центр +- 議論 +- ได้ +- ですし +- 如今 +- ▁passions +- ▁limbs +- ▁producer +- verein +- ▁مج +- ▁glen +- ▁natur +- rade +- ▁verrückt +- 版本 +- 回転 +- lica +- ▁hinder +- ▁hastened +- ▁anticipate +- 大事な +- の問題 +- ▁zwölf +- 先に +- ▁그니까 +- ▁erstaunlich +- ▁assault +- 发生在 +- ▁vede +- ▁bust +- ▁binnen +- ▁lloc +- は今 +- tré +- といった +- court +- ▁exquisite +- wunde +- ның +- ▁Lee +- ▁bundle +- duct +- 高速 +- ▁nas +- mac +- ▁достаточно +- 体育 +- チャー +- さあ +- ▁desires +- ▁Geschäfts +- gana +- 辛苦 +- 略 +- 交谈 +- 的那种 +- bí +- レベル +- ▁brass +- 说得 +- 무 +- dala +- 歯 +- ▁shalt +- kuba +- スタ +- ▁взгляд +- 对象 +- ▁clinical +- 目を +- 殖 +- industrie +- 戈 +- 的主要 +- ▁Sel +- ԥ +- enne +- ▁ralph +- ▁resting +- ась +- aran +- body +- ▁esteem +- ▁hail +- ඩ +- ▁вместе +- 仔细 +- ▁rip +- ▁Aquesta +- ▁Welche +- Bo +- されていた +- ▁multe +- 锅 +- 否 +- ▁наша +- ▁export +- ▁vigorous +- ▁Phil +- мын +- ▁scandal +- 炮 +- カレー +- ▁Gla +- ▁بازی +- تان +- ▁prevented +- ▁tranquil +- ▁tobacco +- ▁thence +- 집 +- ▁tire +- ▁schwarze +- リング +- ▁embrace +- ▁schu +- ほら +- ▁rural +- ҙы +- ▁skirt +- ▁каждый +- るという +- ▁Bitte +- 仍 +- ຄ +- ▁wrap +- vara +- ەوە +- 者は +- воз +- нал +- ▁offers +- έ +- 恐怖 +- 安心 +- ▁geweest +- ▁dumb +- ▁municipal +- фон +- 上げて +- tē +- 小组 +- ▁Carl +- ▁golf +- ▁Position +- ▁después +- 绑 +- '2019' +- ▁scientists +- ▁但是你 +- 경 +- ▁دیگر +- ▁excess +- লো +- erweise +- pence +- ▁prey +- 把它们 +- قة +- そこで +- ▁fünfzig +- пле +- 微笑 +- ▁cabinet +- ▁accused +- muzi +- 確保 +- ▁решил +- ▁Büro +- 的最 +- كر +- ▁formation +- ▁princes +- ですけれども +- still +- әр +- ▁баш +- ▁entertainment +- ິ +- 奉 +- '800' +- ▁собственно +- 放下 +- ということを +- bird +- システム +- ток +- ▁Tochter +- 被告 +- ▁uncertain +- ▁нельзя +- ▁شخص +- ほん +- arma +- ▁insight +- ▁ruler +- ▁altijd +- ▁habla +- ären +- ▁Here +- zunehmen +- ▁passionate +- ▁nunca +- ▁typically +- 童 +- 와 +- ▁supplies +- räum +- 温暖 +- anche +- ▁lustig +- stop +- ▁ascend +- かし +- ▁Stunde +- ▁начина +- وار +- றை +- ර් +- ▁marquis +- ▁Quin +- 伙伴 +- ▁إذا +- ▁Pop +- 惠 +- кую +- のない +- anne +- していきます +- 友達 +- ▁retire +- 来源 +- almente +- frau +- ▁ўжо +- mates +- няя +- нич +- ▁seda +- 消失 +- ısı +- ▁wrapped +- வில் +- ▁política +- 行われ +- 見えて +- ▁mau +- lager +- ▁při +- 上来 +- lett +- ލ +- dang +- ▁bez +- енный +- 哲学 +- ены +- جز +- ▁barrel +- 相反 +- ▁neighbourhood +- パス +- zeichne +- esten +- メダル +- ▁Kosten +- ▁relativ +- коп +- ▁älter +- 编辑 +- الي +- gründe +- голов +- 体を +- ஷ +- 有机会 +- ▁nobles +- 这件事情 +- が発生 +- ということなんです +- 告诉他 +- 往往 +- ▁دارم +- ũ +- yah +- 五个 +- న +- ▁argue +- ශ +- ピン +- imos +- ▁trusted +- ▁прямо +- рог +- ▁depuis +- idades +- Ч +- ▁যে +- ではありません +- ていて +- 億 +- ▁unpleasant +- smith +- わない +- ▁latest +- рг +- んじゃないかな +- nische +- ▁pulling +- を持つ +- puesto +- 乘 +- 酒店 +- ▁такого +- ▁Gelegenheit +- rats +- ▁svoj +- ▁viva +- 这就是为什么 +- 杂 +- ▁مورد +- পর +- ▁pushing +- ▁depths +- しよう +- ヨ +- Ne +- gles +- ▁guards +- 均 +- ▁hadden +- ▁maison +- ▁sketch +- вых +- 宝宝 +- 弁 +- ▁surgeon +- ▁aŭ +- ▁siempre +- sea +- 除非 +- ▁далее +- quí +- 感觉到 +- capacit +- ▁advantages +- ▁chant +- umwa +- bä +- 森林 +- ▁foul +- مار +- 矿 +- 跨 +- ▁staying +- 热情 +- мысл +- ▁paw +- ▁Straßen +- ▁forecast +- ▁собира +- اری +- 互 +- obli +- енные +- ப்பட +- ▁fum +- ▁cá +- ▁distinctly +- ада +- wari +- 飲 +- 一部分 +- 市内 +- 怎么能 +- gge +- ▁عنوان +- 出演 +- ▁sterben +- ▁reproduc +- ▁register +- ▁Mitte +- tiga +- bron +- ▁Software +- ▁knights +- たちの +- ▁место +- ▁menschliche +- ising +- ▁repent +- ▁mechanism +- taka +- ▁haut +- 需求 +- ▁stopping +- ợ +- ▁breathe +- 濃 +- 不容易 +- stick +- sagen +- ▁guarantee +- 这些东西 +- 詰め +- 缝 +- ▁sabi +- 所在 +- ランナー +- ▁представи +- ▁является +- 特别是 +- شي +- Đ +- ▁després +- 辺り +- 也要 +- ▁образом +- 听着 +- ▁earnestly +- ▁Tim +- воль +- 就没有 +- ост +- ▁oleh +- 何が +- ▁donne +- فة +- trägt +- ▁brains +- 幸运 +- ▁deadly +- станов +- valu +- ürü +- いえ +- ▁skull +- gebiet +- Mar +- 敵 +- 攻め +- ▁spielt +- source +- baren +- ▁Bevölkerung +- 都能 +- ▁stirred +- ▁commonly +- ▁pang +- ▁Before +- 角度 +- ▁tener +- ▁winning +- ▁öffentliche +- 截 +- نة +- ▁parce +- dler +- させた +- уль +- ▁Größe +- ▁nei +- ▁conservative +- ▁туда +- というもの +- 用の +- ▁boxes +- ▁reception +- ▁Şi +- ▁timber +- ▁Druck +- 発生 +- мир +- 適 +- 弃 +- ▁Sud +- 解決 +- க்கி +- ▁baga +- からも +- وح +- ▁comprend +- 在你的 +- 工人 +- 划 +- ложен +- 原则 +- 史上 +- klä +- ե +- markt +- 荡 +- kari +- walt +- オン +- ▁neues +- ▁Ergebnisse +- ▁courts +- нг +- 버 +- shot +- ең +- ▁blew +- haltung +- ▁Herzen +- грамм +- ▁participate +- ▁competi +- ちょ +- 알 +- ▁gerek +- ▁президент +- ▁esti +- ண்ண +- 공 +- 교 +- ▁fatigue +- ▁fatto +- ▁aquella +- altra +- ▁blossom +- 恵 +- 鮮 +- 瑟 +- ▁jewel +- ▁Rock +- 帝 +- ось +- ▁remainder +- ▁راه +- 特別 +- ▁technique +- ▁pillow +- opa +- 많 +- ▁category +- ▁таким +- ませ +- ▁monday +- それぞれ +- pio +- läuft +- ▁zunächst +- ▁Second +- volg +- motiv +- ▁nerves +- ▁programme +- omu +- tious +- ▁Bell +- ▁augen +- дон +- 一把 +- инский +- ▁ஆஹ் +- ▁omdat +- ▁dutch +- سته +- ▁profitability +- affe +- ▁standards +- 付近 +- 俄罗斯 +- kreis +- ▁gelang +- овых +- кор +- ▁procession +- ơ +- 将其 +- 他有 +- 爱的 +- 很有趣 +- ▁Omu +- кія +- ▁summit +- ▁fünfzehn +- چه +- 生活在 +- コー +- 情况下 +- ▁gesamten +- 海岸 +- 非常好 +- 家长 +- écri +- ▁murderer +- ▁Он +- ▁مست +- ▁accelerate +- itas +- cía +- ▁diejenigen +- ▁contracts +- ▁commun +- 高中 +- hte +- ▁delivering +- たっぷり +- kala +- ▁popula +- 公园 +- ▁domain +- ▁journalist +- ▁repar +- crit +- ための +- ▁Auswirkungen +- ▁былі +- ▁Papier +- 所以他们 +- lē +- பார் +- 外交 +- ▁Bundes +- 我们家 +- そんなに +- ▁stolen +- 低い +- 附 +- zaba +- иа +- ▁egyptian +- ା +- 달 +- 脏 +- ▁uwo +- lă +- 会不会 +- ▁Web +- ▁olduğu +- дор +- regul +- 時点で +- 必ず +- ță +- wissenschaftlich +- ▁scripture +- mper +- を示 +- ▁alterna +- kke +- حر +- shuri +- ▁format +- amerikanische +- ння +- ▁serving +- ▁doll +- ▁resemble +- ▁slain +- 走进 +- 美丽的 +- 展開 +- ▁wilderness +- ▁creo +- kind +- ▁Stein +- ▁mussten +- gramm +- ▁абсолютно +- ని +- film +- 不好意思 +- ▁sailor +- 我看到 +- 笑声 +- 嘅 +- 火车 +- 便宜 +- gid +- うまい +- 也就是说 +- 中継 +- ▁cats +- ango +- wehr +- чил +- ▁flashed +- 小孩 +- 으면 +- ▁occupy +- 不对 +- ▁victims +- rink +- ▁shan +- ▁dife +- ▁должно +- あんまり +- чай +- ▁وقتی +- 谈话 +- 珍 +- 庁 +- コース +- rechte +- ▁компани +- 后的 +- ▁fem +- ▁utili +- struktur +- 付出 +- mount +- ▁humour +- ții +- ▁respectable +- を迎え +- бри +- ▁бог +- myaka +- mast +- ▁accomplish +- оро +- 什么意思 +- ▁allowing +- ▁streams +- 扫 +- mming +- ▁Pferd +- ▁aya +- ގެ +- ▁Canada +- chief +- ▁erhielt +- ▁scared +- ▁burnt +- ▁gloomy +- ▁выбор +- lha +- ▁さて +- нулся +- ▁Link +- ımı +- ベル +- ிருந்த +- 仪 +- 睡眠 +- ▁God +- ▁Vorteil +- ▁Stellen +- ístic +- を作り +- ▁Reich +- 重点 +- 氷 +- つい +- лийн +- 早速 +- ▁số +- ▁стол +- ▁Haar +- ゾ +- 説 +- ▁equity +- ▁grove +- ▁زندگی +- お客さん +- 选举 +- 妮 +- फ +- ывать +- 開かれ +- kamer +- хай +- ▁adjusted +- парт +- ▁insanlar +- 垃圾 +- ան +- だが +- 您可以 +- mena +- ▁suitable +- ދ +- fähig +- bikorwa +- 上升 +- ▁needle +- ▁hawk +- มี +- ベスト +- ▁prend +- ▁clima +- poko +- 地元の +- ▁jos +- قابل +- ▁prudent +- 什么是 +- ▁agony +- ▁inevitable +- losen +- ▁таких +- ▁wages +- ▁otros +- date +- 双方 +- 诊 +- ▁cluster +- 桂 +- 杉 +- インド +- ▁meaningful +- iet +- ▁fruits +- 継 +- ▁status +- 的男人 +- ▁trage +- ▁polly +- zehn +- でしたが +- ▁самое +- amour +- 和她 +- ufer +- ▁transparent +- rier +- tero +- スク +- を入れ +- ▁offence +- ▁இருந்த +- ライブ +- ▁washed +- ▁obscure +- esso +- 規模 +- ▁resident +- ▁resolve +- ▁betrachtet +- ▁foarte +- 幻 +- 正好 +- ▁damn +- zugehen +- 那儿 +- ஓ +- წ +- tian +- 層 +- ▁assembled +- 中国人 +- ▁chairs +- 夸 +- ▁сур +- 解除 +- ▁Dit +- 終わり +- To +- ▁menyang +- tanz +- ▁officials +- ▁libro +- ▁Glauben +- ibu +- ▁prim +- гы +- ▁Most +- 构建 +- ▁entweder +- ▁bauen +- 是关于 +- 動物 +- ▁Mund +- ambu +- かけて +- rop +- ▁generations +- ▁Joan +- ▁dash +- ▁masses +- ▁mögen +- بات +- oud +- 込んだ +- ▁brows +- 演奏 +- 一気に +- 防衛 +- cula +- ▁Interesse +- еры +- 方針 +- 和他的 +- mog +- ຈ +- 輝 +- ▁disguise +- ▁varia +- ▁leaped +- नि +- ▁leer +- шь +- zieren +- ļa +- ına +- rique +- 彼女は +- ளை +- 雑 +- amendement +- 这本书 +- ▁confined +- 不久 +- 将在 +- ▁nephew +- imit +- бан +- 缘 +- gado +- 我们今天 +- ereza +- 墓 +- ▁domina +- 买了 +- 体制 +- 稳 +- صر +- 一万 +- 競 +- 浴 +- 的结果 +- 時から +- पा +- ▁hopefully +- ▁còn +- 什么都 +- ▁poe +- ▁fastened +- ▁brands +- ▁ấy +- 烂 +- ▁florence +- ▁legislation +- gari +- ▁высо +- barra +- ▁Plu +- aciones +- dna +- 一首 +- 賀 +- ▁december +- ▁valid +- cine +- ▁erinnert +- pole +- familie +- 琳 +- strat +- ▁humans +- gā +- ▁kinder +- ▁scri +- ampl +- ▁blast +- 琴 +- 曹 +- ▁dispar +- 绿色 +- 挣 +- ▁продолжа +- кру +- ▁steamer +- ▁arguments +- ▁toda +- を超える +- ▁kullan +- tli +- ▁Zustand +- ▁suspicious +- 脉 +- ▁obstacle +- 競技 +- 椅子 +- ▁Maß +- ▁temporary +- 的任何 +- ▁persuaded +- 如果你想 +- 逐 +- ▁cambia +- ウン +- ό +- ▁abbiamo +- ▁vary +- ▁понятно +- ぐらいの +- ものの +- 辈 +- ▁commenced +- ▁Schrei +- ligt +- 主题 +- 都知道 +- schule +- ▁separa +- web +- ▁Kaj +- 飞行 +- thon +- ▁nnyo +- ump +- ▁Service +- ▁irgendeine +- eus +- 件事 +- 这么大 +- 的老 +- ▁protected +- ▁effectively +- 分かります +- dür +- 主要是 +- кус +- ▁enorme +- ष +- エネルギー +- ▁sunset +- нэ +- wah +- ▁призна +- кино +- ▁deinen +- ▁costume +- くない +- کو +- ▁homo +- ▁کردن +- tò +- ума +- もありました +- いきましょう +- ▁seats +- ▁Dort +- ▁mars +- ▁supporting +- ▁replaced +- ▁hammer +- ▁Two +- ▁interven +- sley +- ▁машин +- 把这 +- ▁genauso +- مۇ +- нская +- ▁furnish +- ▁units +- 的爱 +- ▁strangely +- 먹 +- steuer +- 受伤 +- 関係者 +- 是这样的 +- 失去了 +- ▁اینجا +- 分け +- ທ +- ▁globe +- 市場 +- 播客 +- иг +- 资本 +- ▁contents +- hanga +- zicht +- ▁großes +- 还会 +- ▁jove +- кер +- ▁saints +- 丰富 +- ▁Vol +- ▁fazer +- ▁mou +- دون +- ▁Ntabwo +- ▁Pass +- ▁fires +- ▁devices +- ▁tienen +- нымі +- ▁mademoiselle +- ガス +- taba +- ▁achter +- 鬼子 +- 还真 +- 買い +- ▁rays +- ▁Cent +- ▁buryo +- ▁cual +- ▁amigo +- ▁murdered +- んでいる +- ▁vulgar +- elte +- ▁seas +- ▁либо +- пуска +- ▁aceste +- 申请 +- そば +- ▁Brit +- ▁bitten +- 的历史 +- ▁Natürlich +- ▁Texas +- 炭 +- ▁Schwester +- ▁блок +- ▁neighbors +- ாள் +- 一時 +- ▁двух +- rita +- ▁effet +- ▁часть +- מ +- ▁speziell +- ▁групп +- われ +- 亲自 +- جی +- ashobora +- ▁armies +- tuk +- ▁succession +- 上昇 +- ▁Minister +- 一些人 +- ৃ +- 究 +- ▁wing +- いう +- 的呀 +- anthrop +- の一部 +- ▁fountain +- 兼 +- ▁minut +- りとか +- 初の +- 把他们 +- ว่า +- ▁Design +- ▁familia +- 神秘 +- lui +- ▁testimony +- 教えて +- ▁abruptly +- ▁Une +- ▁decidi +- riet +- 类型的 +- ▁meetings +- 받 +- 월 +- рай +- space +- iadau +- ▁pall +- ▁antwoordde +- 職員 +- مز +- каў +- think +- ▁insbesondere +- ▁solitary +- ヨーロッパ +- ▁responded +- 中国的 +- ▁puzzled +- 逆に +- ▁Mul +- 赢得 +- గ +- сөн +- 不是我 +- ▁sauce +- ついて +- ▁diferente +- life +- 民主党 +- ▁pronounced +- までに +- 公民 +- ▁hohe +- ▁diverse +- ▁namely +- ▁weigh +- pian +- 这张 +- ▁thereby +- வீ +- ▁tragedy +- が出て +- ▁但是他 +- 谷歌 +- іцца +- ök +- nika +- ▁chứ +- 剪 +- ▁offices +- 你们俩 +- ▁channels +- zette +- sozi +- ▁twi +- 公園 +- gize +- rata +- 養 +- ▁scott +- ▁grip +- ▁longing +- ▁texas +- ▁diamond +- 事業 +- 这场 +- ▁Kä +- bou +- 想过 +- ுள்ள +- 一遍 +- 尝 +- schutz +- ▁clara +- ▁уу +- вяр +- 善良 +- ▁memories +- üyor +- ▁producing +- ▁sadly +- ▁wink +- 生存 +- 现在的 +- 协议 +- ▁scent +- ىنى +- hine +- ▁veces +- ▁pří +- ▁Ng +- 购 +- ▁Werk +- ▁cog +- 行政 +- ワールドカップ +- ▁Schmerz +- ▁milit +- liği +- ▁neglect +- pier +- 黑色 +- ▁என் +- ▁functions +- ▁harvest +- 시간 +- ▁floating +- が今 +- ▁metro +- lingen +- яць +- ▁partially +- ▁indication +- ▁Rei +- スタッフ +- ▁Sub +- 予報 +- ▁guilt +- gesehen +- 的学生 +- ▁sergeant +- mari +- ады +- Wa +- 平等 +- 房间里 +- 않 +- ▁documents +- ▁cavalry +- ▁பல +- fashioned +- ▁sets +- бой +- ▁nehme +- த்தின் +- ▁پیش +- क् +- 广播 +- ае +- 动作 +- ▁integration +- 石头 +- ▁результат +- ▁amas +- ▁brig +- দে +- 饼 +- ▁fiction +- 題 +- ▁кстати +- ▁shu +- ▁பிர +- しまって +- flex +- eorang +- made +- рал +- shop +- 領 +- scheiden +- ĉa +- 旁 +- 舍 +- ї +- 不是说 +- truc +- schiff +- ▁Scott +- ія +- ▁roses +- ▁但是在 +- 胆 +- ▁Ра +- داری +- 我们想 +- Ab +- ▁trabajo +- に関する +- ▁defect +- ▁Gil +- ერ +- ▁wandered +- 会談 +- ckle +- 勝負 +- Au +- meli +- ▁bose +- 飲み +- šo +- 植 +- ▁애들 +- 所说 +- vian +- ▁لقد +- 作って +- ▁satisfy +- ffle +- ▁Arzt +- ▁wann +- ▁deeds +- ▁myth +- ▁Nummer +- ▁criticism +- ▁Kro +- 故意 +- 单位 +- ▁맞아 +- ستا +- ▁guardian +- ▁indignation +- Ver +- ▁recollection +- 会見 +- ▁whoever +- break +- 気温が +- সি +- ▁gleichzeitig +- 陛下 +- ▁delicious +- 开始了 +- 读者 +- beeld +- 起了 +- ▁illegal +- 地面 +- ▁yeni +- ▁enjoyment +- ▁outrage +- 并在 +- ▁flee +- 屏幕 +- больш +- ▁interact +- ▁momentum +- ▁hano +- ▁inspiration +- liches +- ලා +- iser +- ▁glasses +- 加上 +- 暑 +- орон +- 耐 +- ▁dramatic +- ▁равно +- hla +- gó +- 协 +- 障碍 +- 倉 +- ▁sunk +- ▁шестнадцать +- ▁tendency +- ▁Island +- するなど +- 预测 +- よい +- ▁unbedingt +- ▁bleibt +- ▁шмат +- ▁disp +- angira +- arca +- ▁crimes +- onne +- ▁loyal +- ▁apostle +- ▁betray +- оны +- ▁inward +- ilia +- 烦 +- 的日子 +- ▁사람 +- lg +- 見る +- rinner +- 沉默 +- ▁seize +- ஞ்ச +- iler +- ▁правильно +- なあ +- ▁بسیار +- 媳妇 +- ▁говоря +- 主意 +- 물 +- ▁ruhig +- ▁Turn +- 不出 +- сом +- ▁Nel +- 紫 +- unddreißig +- を務め +- unta +- ▁Autor +- ▁diana +- ▁alexander +- ▁telegraph +- ▁ساخت +- нен +- ▁amen +- 焦虑 +- ▁Artikel +- аар +- حة +- ▁seves +- ▁пост +- һә +- 天下 +- Un +- 明显 +- ▁لدي +- лаг +- ▁Rand +- ▁personality +- hart +- abandi +- っていました +- zek +- ▁arrangements +- بد +- ▁sermon +- が多く +- 的照片 +- ▁ruined +- ahu +- なと +- 智慧 +- ัง +- ▁теле +- tou +- ▁geheel +- ению +- ▁своих +- ▁virtues +- ▁widely +- ▁mingled +- ▁Kir +- 我们没有 +- ccio +- ▁Club +- 我们不 +- езд +- してた +- ▁rider +- teilung +- كو +- 一片 +- ▁женщин +- سان +- 重复 +- 生日 +- ▁approved +- 绕 +- 機能 +- ishi +- ograph +- 讲述 +- 危机 +- 広がって +- 获得了 +- رە +- 意见 +- 妇 +- ▁loaded +- cation +- いいですか +- ▁fran +- услов +- ▁pastor +- 统 +- 提供了 +- 再開 +- ▁puff +- ն +- 障害 +- ▁financi +- 给大家 +- kund +- ▁tune +- arro +- ▁없어 +- ▁levi +- lade +- 今も +- ▁yea +- ▁serpent +- ▁endless +- いいですね +- いき +- ▁Where +- ▁Beste +- riz +- 离开了 +- 径 +- ränk +- ному +- 伸び +- ▁Fähigkeit +- ▁Schlaf +- ▁duchess +- 你和 +- ▁لأن +- 就这么 +- ▁executed +- ▁heels +- ▁chicago +- 성 +- ▁vader +- zahlen +- ▁periods +- called +- just +- ▁тринадцать +- さい +- dron +- 改变了 +- रि +- ▁vera +- ▁sharing +- inta +- ▁Feld +- 負 +- ▁ihe +- кров +- dog +- ▁чт +- pool +- ▁dreamed +- ▁heilig +- ▁slender +- 在家里 +- ▁uncon +- മ +- ▁그리고 +- ▁Kandi +- Union +- 英里 +- ▁chances +- ryo +- 场景 +- 遥 +- 链接 +- 移民 +- 十四 +- ▁sac +- fact +- ▁selten +- ▁Arab +- 谈到 +- ▁feather +- ▁inch +- 尽可能 +- 都被 +- 日常 +- ▁attorney +- human +- ▁helpful +- 劳动 +- 经营 +- ▁mă +- ▁Cro +- ▁languages +- ▁Esperanto +- лев +- ▁sustainable +- タン +- 生産 +- 纵 +- うちの +- ▁promotion +- lateral +- stamm +- making +- 華 +- ▁risen +- geist +- ▁vehicle +- 大量的 +- сна +- ▁careless +- ваю +- 村さん +- 一刻 +- 育て +- ▁Ча +- 取り組み +- ▁passengers +- 雇 +- း +- ▁chimney +- ▁엄청 +- ▁abundant +- ใน +- 调整 +- 場で +- moc +- ▁sao +- ▁Generation +- 男女 +- ▁provin +- ▁funktionieren +- ▁limp +- ▁calmly +- gesellschaft +- ▁Ram +- ▁quello +- ▁tv +- ▁agents +- 、4 +- あら +- 和他们 +- ▁беларус +- ▁Bezirk +- どっち +- ▁villages +- ▁intervals +- ▁cooking +- ▁printed +- ▁revelation +- ▁Gene +- 也就 +- ǧ +- 今シーズン +- 女王 +- ▁mwen +- 我们对 +- стер +- 上で +- 链 +- ▁politique +- ▁sunlight +- ▁differently +- god +- 보고 +- 暑さ +- 对她 +- ▁dost +- 图像 +- ▁poden +- igno +- çu +- ▁holes +- かれ +- 多久 +- ▁consumers +- ▁sweat +- bac +- 喝酒 +- ▁کم +- ▁movies +- ▁одной +- τ +- テン +- arian +- ▁verdad +- قول +- 司机 +- 承担 +- ▁exceed +- низ +- 今日も +- 措施 +- ▁clan +- uso +- ▁Last +- 回应 +- ▁nowhere +- ência +- ▁islam +- ɣa +- gende +- بان +- 報 +- quir +- ▁Lehr +- Ц +- ▁nerve +- ▁january +- 批判 +- 很大的 +- 男朋友 +- 通信 +- کت +- 栏 +- いきました +- ▁путин +- 町の +- 混合 +- ▁absorbed +- wami +- もともと +- èn +- 有趣 +- ▁hinunter +- state +- ▁fathers +- ▁sweep +- 情報を +- ▁alcohol +- bonye +- ggy +- රි +- fitting +- isierung +- aq +- ▁exposure +- 紹介 +- iker +- indu +- ▁friday +- цыя +- brü +- ▁faculty +- ▁convict +- ▁tad +- 誰か +- نظر +- 新型コロナ +- 업 +- ို +- 遺体 +- ▁addressing +- 什么的 +- たって +- ▁veu +- ▁presents +- ▁depression +- ハイ +- ▁Suche +- 财产 +- ▁Ту +- 前半 +- ▁joan +- を引き +- стрел +- 田中 +- ▁user +- ▁constitu +- ▁Publikum +- ▁Tour +- ەکە +- 機関 +- ▁muốn +- овал +- ▁moonlight +- ▁welke +- まだまだ +- ▁которой +- ▁participa +- 彻底 +- ▁discussions +- ▁году +- idée +- 醉 +- 出して +- ▁Nachrichten +- fordert +- ▁proces +- 高さ +- ▁Esto +- 예 +- 切って +- ▁Inhalt +- ▁fortunately +- ево +- ndre +- ▁قو +- arı +- 你都 +- فه +- 披露 +- market +- ▁сами +- tano +- 了一下 +- 平衡 +- '150' +- gekommen +- 多大 +- ተ +- ▁lieber +- zas +- ▁demands +- 多少钱 +- べき +- 飛び +- шә +- ▁stare +- 他们有 +- abantu +- 하지 +- ▁hı +- 魂 +- ▁supplied +- 豚 +- шен +- ▁preparation +- bing +- ▁heavenly +- 規 +- ▁nhất +- selt +- ▁გა +- ▁gig +- ▁можем +- 銃 +- ضا +- ▁raus +- ▁Ergebnis +- ▁forests +- ▁erstellen +- ▁Fle +- ஸ +- ▁behaviour +- 年轻的 +- lder +- энд +- тел +- யான +- ▁Please +- ▁travelling +- 多么 +- volution +- 더라고 +- ▁größere +- を変え +- ▁procedure +- 遭 +- ▁Material +- ▁campus +- を行って +- 級 +- руу +- roch +- ▁Mä +- 現在の +- 你已经 +- чын +- ▁erwartet +- ▁requirements +- 就不会 +- 専門 +- médi +- 转向 +- Lachen +- 未来的 +- ▁commune +- ▁Fluss +- 孝 +- tega +- ▁Zimmer +- 我认为这 +- ▁Cra +- レース +- 状况 +- ▁جدا +- 一大 +- 不是你 +- зор +- ▁choses +- ▁Tie +- ▁representative +- ▁pent +- ▁knot +- 有点像 +- െ +- ▁slim +- ▁Sho +- っか +- чных +- 法案 +- ța +- ▁barbari +- ▁Sto +- ▁promptly +- камі +- изм +- oog +- ▁verme +- ▁adorn +- やめ +- ▁commence +- ▁Hör +- schritt +- emt +- ところに +- ▁vz +- ▁тип +- 债 +- леч +- がん +- ▁добра +- abend +- excel +- ▁aceast +- のもの +- 你真的 +- ▁invented +- 矛盾 +- ▁neutral +- と思うんですけど +- erna +- rf +- zima +- 规模 +- ワー +- 喺 +- ▁currency +- ▁eagle +- lom +- magn +- ▁claimed +- ▁pockets +- 점 +- etto +- 的目标 +- ▁হয় +- ▁blade +- ▁laŭ +- евская +- 重要な +- dī +- ▁кабинет +- ▁zéro +- ▁francs +- 大多数人 +- вил +- ▁identified +- 你可 +- ▁logic +- 你给我 +- تس +- axe +- ▁achievement +- ▁contribute +- ▁Ober +- 反応 +- 宝贝 +- もあり +- ▁itu +- 基地 +- ▁admiral +- nydd +- shyira +- 中学 +- drin +- teen +- こんなに +- رات +- 脸上 +- ში +- ▁colleagues +- 同情 +- ▁random +- 覆 +- 挤 +- 声が +- ▁bewegt +- 结束了 +- 安静 +- 영 +- ▁weiteren +- ▁okul +- ▁начальник +- erfolg +- ▁grava +- ▁pove +- ▁Pod +- 的数据 +- ▁typical +- クリ +- schaffen +- 也好 +- ▁wrought +- জন +- kommt +- だったり +- dust +- terra +- 講 +- ▁bushes +- ▁muß +- ユ +- ▁хэл +- ▁waist +- 我一 +- 跌 +- 工业 +- ▁dave +- ئو +- 回头 +- ▁Radio +- '600' +- ▁confirmed +- ということが +- 外国 +- 来讲 +- ža +- chal +- ▁mouse +- 仲間 +- ▁alert +- ላ +- ▁Council +- 肌 +- ▁beginnt +- 铺 +- ▁Day +- ▁hired +- 全国の +- だい +- 伟 +- ук +- ▁condemned +- いよいよ +- ▁stap +- ▁música +- ithi +- ▁conform +- performing +- ▁incredibly +- ▁cresc +- ▁lingvo +- 扭 +- 胃 +- ▁thumb +- ▁honestly +- isk +- ffel +- ▁specimen +- glass +- はこちら +- ▁spark +- munt +- 西方 +- ▁grupo +- 你不会 +- ▁söyle +- histoire +- વ +- 随便 +- rama +- ಕ +- なきゃいけない +- ▁circuit +- の声 +- ▁Rue +- 练习 +- ▁prayed +- 更大的 +- 納 +- برا +- ほかの +- 회 +- ▁entdeckt +- ▁команд +- фор +- 这也 +- ▁nasıl +- nehmer +- 相关的 +- 原谅 +- рыв +- 送到 +- ▁deutsche +- IS +- ▁purse +- 上有 +- ▁soziale +- ▁rhe +- 代码 +- ует +- 輪 +- ▁Hallo +- 比我 +- iest +- かかって +- ▁tid +- posició +- ▁daring +- してくれ +- sprach +- 東部 +- 毎 +- ▁agency +- poj +- 知って +- gamba +- ▁carr +- 指导 +- ▁yeux +- 飛 +- ▁followers +- 贾 +- ▁obedience +- ▁spät +- ▁mischief +- 報道 +- 川さん +- action +- 迎 +- ▁resort +- ▁deals +- ▁warned +- 摔 +- 丑 +- ▁Einzel +- லாம் +- ほどの +- ▁mob +- ああ +- もんね +- jed +- 하면 +- precia +- 百万 +- ▁represents +- российск +- ▁entitled +- ▁derived +- ▁lace +- ▁resource +- ▁certainty +- ▁gott +- 又是 +- ▁gallop +- ジュ +- iwa +- 電気 +- 这就是我 +- руч +- ▁dolan +- npr +- Il +- ▁coarse +- ▁процесс +- みます +- ▁tenderness +- ▁charges +- 野さん +- ▁emerge +- umber +- 喊道 +- 检 +- ▁satu +- ▁sahen +- є +- ▁notwendig +- தோ +- ▁flames +- 士兵 +- ▁Seele +- خبر +- ▁Nhưng +- ▁från +- ▁کښې +- ▁아니야 +- ▁affectionate +- ▁curtain +- ▁detailed +- やら +- 我叫 +- 一系列 +- ▁пара +- oce +- ▁exempl +- 范围 +- がいる +- sions +- ▁artists +- ària +- 凭 +- ▁divorce +- ▁хоёр +- ▁corridor +- ▁lofty +- 最高気温 +- roma +- 陷入 +- 開け +- ▁nivel +- 集团 +- waka +- ב +- ▁Feind +- 损失 +- ▁орган +- mite +- polo +- 内の +- ▁Zugang +- 使って +- ▁fein +- ▁numéro +- ▁allí +- ற்ப +- ▁pressing +- afrika +- ষ +- dores +- 可以看到 +- ▁roughly +- ▁стара +- 技能 +- ▁savings +- ▁farmers +- ▁theater +- glück +- magnet +- ▁când +- ▁barely +- ▁briefly +- вялі +- போது +- ôn +- 児 +- احت +- と呼ばれる +- cloth +- ▁aka +- 和一个 +- ▁painter +- ▁challenging +- 初めての +- ▁borrow +- 完美 +- ▁pillar +- ί +- ரோ +- ▁Frei +- 军事 +- ▁advertising +- ▁solitude +- ことができる +- やってる +- 力が +- ▁bargain +- のではないか +- 不少 +- ジャンプ +- ბა +- ビル +- жо +- ▁sees +- يح +- ▁normalerweise +- bih +- ▁před +- ▁telefon +- ▁injured +- ▁marsh +- uburyo +- ▁правда +- 就把 +- ▁газ +- 瞧 +- ▁moves +- के +- ▁writes +- ▁advocate +- なんですよね +- ▁sebagai +- ▁wherefore +- ▁hoop +- maga +- ▁sphere +- 来月 +- guin +- 咬 +- ▁underlying +- ▁olduğunu +- にいる +- 主动 +- ▁plusieurs +- みんなで +- ً +- nwa +- 燃 +- lijke +- 私人 +- ▁Studie +- 例子 +- ূ +- 牢 +- ▁happily +- 따 +- ciò +- ▁идет +- ▁парти +- を入れて +- ▁ranks +- īt +- ctic +- 就是这样 +- ▁blows +- 有用 +- ▁colon +- 吵 +- 找你 +- ▁pensar +- кім +- 彼は +- 迹 +- ▁organized +- 柄 +- дог +- ▁countess +- niu +- liest +- ▁construct +- ో +- ▁newly +- ▁Kontrolle +- ▁சிற +- ▁nahe +- ▁тү +- стей +- 指摘 +- ▁announcement +- ▁tenant +- ▁mexico +- おはようございます +- vuit +- 突破 +- ▁видел +- terie +- ▁energ +- houd +- 봤 +- ▁harbor +- տ +- ▁verlieren +- avaient +- aquest +- ▁Dal +- 优势 +- ▁deja +- ember +- 讨 +- 竟然 +- ▁slecht +- initiative +- ehrt +- るんです +- ativ +- ▁Studien +- letter +- 观点 +- 到底是 +- ▁stond +- ▁penetrate +- ▁dividend +- ▁investors +- 警方 +- ▁иван +- 面白い +- っちゃった +- ▁বল +- ▁revolver +- ateur +- руш +- misch +- ▁vengeance +- シュ +- quatre +- ▁wheat +- shaw +- ▁nawe +- trop +- cret +- があると +- 指出 +- рә +- ▁simon +- 创作 +- ▁thither +- ивать +- ▁beschäftigt +- ▁brethren +- igita +- ▁Nie +- 这个词 +- ▁Brown +- قط +- ribu +- ▁bitterly +- ▁sofa +- ▁component +- ▁ventured +- これだけ +- bora +- 很久 +- 았 +- ▁Religion +- 缺乏 +- ▁Pin +- 咱俩 +- ▁panic +- 很明显 +- zorg +- 的父亲 +- ▁survive +- 一辈子 +- ▁gravely +- ▁Tagen +- ▁counted +- ▁positively +- 召 +- дым +- ූ +- ▁dacă +- 関連 +- ினார் +- 沿着 +- ▁bisher +- はありません +- だろ +- ▁elf +- ް +- 共产党 +- ▁clutch +- anten +- 咲 +- ▁casi +- یل +- ▁conversion +- 知道的 +- ▁warten +- ьте +- ▁پای +- feuer +- 瓜 +- ▁بۆ +- ▁tenir +- ▁Ну +- 不明白 +- ▁napoleon +- の映像 +- 两种 +- 自从 +- 幸せ +- hib +- jwe +- ▁podemos +- ▁pirate +- ▁bedeuten +- лова +- 我得 +- 切れ +- orden +- つ目 +- ▁weeping +- をつけて +- ছি +- 衰 +- ▁интересно +- ▁tradu +- ▁сме +- 互动 +- ស +- 행 +- がい +- ▁versuchte +- 恒 +- 르 +- ▁Anzahl +- 彼ら +- ▁practices +- ▁мед +- menti +- ▁weiterhin +- ▁restore +- ologia +- ▁computers +- ▁algun +- 用来 +- 干净 +- school +- аза +- ▁vivo +- ▁Grundlage +- ukan +- なくても +- ▁estado +- ▁attempts +- ▁Ĉu +- ▁sample +- bund +- ▁analog +- ▁celle +- ▁salva +- ▁providence +- すばらしい +- ▁House +- ▁trim +- ▁erkannt +- ▁rushing +- 旋 +- ゼ +- ▁bulk +- はおよそ +- 世界中 +- ▁findet +- cultura +- زو +- ▁instances +- зан +- ගේ +- であり +- енных +- ▁santa +- ▁jewish +- gele +- ▁gusta +- ▁condemn +- '5000' +- 司法 +- führer +- ▁sais +- үл +- 東京オリンピック +- に入り +- 诚 +- ධ +- ▁nouvelle +- ▁invite +- ovat +- ▁treaty +- ▁navy +- urteil +- minate +- іх +- 襲 +- ▁mexican +- ggle +- اي +- ▁germans +- ▁aqui +- 在我们的 +- ▁canvas +- ▁selection +- ijo +- 送り +- ▁blaze +- lığı +- ▁gesund +- gora +- ▁آب +- ▁Who +- 知り +- nız +- ▁ôl +- ▁suck +- ▁displayed +- えば +- ▁undoubtedly +- 的过程 +- طة +- ▁Así +- ▁warrant +- ▁ursprünglich +- ▁کردم +- пен +- 婴儿 +- 蛇 +- ▁هغه +- ▁illustrate +- 事実 +- ▁bureau +- fata +- なか +- ▁Richter +- mbwa +- 费用 +- 那就 +- ▁Teile +- ▁Daniel +- ▁nên +- ▁Ад +- ▁deploy +- ký +- Č +- 純 +- ▁entscheiden +- ▁چیز +- ▁Reaktion +- ▁можете +- währ +- アイ +- က +- ▁acquire +- лив +- ▁caesar +- 有事 +- ské +- ▁doth +- ▁tête +- 蓝色 +- ▁conversations +- ▁wept +- 入れる +- ▁politischen +- ▁jerk +- ▁elaborate +- 然后你 +- ▁весь +- ▁ladder +- に行って +- 分开 +- ▁advancing +- macht +- ▁které +- 電力 +- Wi +- дыр +- ▁hamwe +- とう +- ▁lasted +- タイミング +- ▁afite +- 报纸 +- 事態 +- ▁Ker +- 漂 +- ▁employer +- ▁discourse +- ▁cannon +- ▁Kommission +- yat +- shim +- ▁которое +- valo +- даў +- istoj +- breng +- ▁prevail +- ニング +- ▁julie +- nine +- ljen +- вым +- ▁pension +- 縮 +- ▁сум +- ▁kleines +- ▁publish +- lden +- ▁alex +- ▁niece +- 閉 +- に来て +- ▁simplicity +- もらった +- 吃的 +- ▁idol +- ester +- ਰ +- はね +- ээс +- 针对 +- 阳光 +- たく +- 書き +- ▁brood +- ୍ +- が続く +- craft +- 专门 +- 調整 +- んですけども +- 批评 +- ▁dependent +- 沟通 +- 全面 +- 两千 +- 掌握 +- ▁других +- kere +- 으니까 +- 基金 +- iska +- ▁kang +- empresa +- ▁daniel +- ▁четыреста +- 着你 +- 반 +- utilitza +- 会被 +- ▁riu +- 主張 +- نية +- philosoph +- ▁munsi +- ▁помог +- طل +- ▁measured +- 稍微 +- ▁وقت +- 第四 +- ▁flows +- ▁hogy +- 都很 +- ▁Act +- までは +- 朋友们 +- ▁trials +- ▁schlug +- 符合 +- 目標 +- ▁shower +- シーン +- チョ +- нные +- 分手 +- ▁kent +- ެއް +- ින් +- ▁владимир +- ▁Angelegenheit +- ▁ascertain +- ▁gaat +- ▁الث +- oja +- ▁birthday +- 続いては +- ▁robin +- 緊張 +- ▁Fol +- ▁сделал +- ▁amused +- plex +- 刑事 +- 我还是 +- 储 +- ▁Zahlen +- やす +- ▁frozen +- ごろ +- このまま +- ▁sailors +- ▁önce +- lite +- ▁eerste +- 补充 +- 走向 +- ▁». +- 現地 +- 愚蠢 +- Lo +- gă +- artagnan +- ▁suite +- іў +- 有两个 +- 机器人 +- ▁relate +- ▁fisher +- 财富 +- ▁vanity +- 尊 +- 凶 +- ɛa +- 谋 +- umba +- きています +- ▁Wild +- ▁magistrate +- ▁مرد +- Ich +- مب +- appel +- 姿勢 +- 疲 +- પ +- yez +- 工程师 +- 乎 +- მი +- ▁exemple +- ▁Even +- ▁ample +- 混ぜ +- ▁Poli +- ▁Ari +- 皇帝 +- ▁schr +- ▁picking +- 庆 +- ▁quella +- يز +- 保護 +- 結局 +- блі +- ▁Cur +- 窗户 +- ическая +- মান +- '31' +- 你来 +- ▁семнадцать +- ▁scout +- ▁Lebensmittel +- 收听 +- Qu +- 上次 +- قوم +- கால +- جميع +- wür +- ▁Good +- 建设 +- 世界の +- δ +- ▁raison +- ▁Mike +- нный +- шо +- idio +- るため +- ვა +- fällt +- ▁declined +- geza +- 如果您 +- 大声 +- すべて +- いるのは +- 赢 +- ▁obeyed +- ▁depending +- ▁colony +- 側に +- стью +- ក +- 워 +- osten +- ▁flourish +- 是如此 +- を奪 +- ▁كنت +- ▁swing +- 脾气 +- ▁garments +- 前进 +- brid +- ▁Și +- ▁sarah +- 鼻 +- ▁Ny +- Ja +- ▁swallow +- ▁entreat +- pran +- ▁angesehen +- 警告 +- ▁experiments +- 坏了 +- 援 +- kunst +- ▁correspond +- ▁быстро +- 首相 +- ziert +- ですからね +- ▁judges +- ▁passe +- ▁frontier +- 玩儿 +- ▁байх +- の影響 +- gericht +- zeichen +- bari +- 两位 +- ▁colours +- 唯一的 +- 実現 +- 鳴 +- 寻求 +- 評価 +- ▁impre +- ▁aspects +- ▁zeigte +- ▁compte +- ▁secrets +- ması +- 句话 +- ifik +- ň +- 策略 +- ▁Henry +- 一口 +- ▁Бо +- ប +- ං +- ▁chính +- gier +- বার +- нию +- ケース +- ▁fanny +- ▁curs +- 烤 +- wedd +- bian +- ▁twist +- ▁mère +- ▁почти +- сим +- 日本は +- ▁idiot +- ▁hoog +- лиз +- ▁comprehend +- ▁Wald +- 可怜的 +- 引用 +- 统治 +- тик +- ▁들어 +- ▁lawn +- きっかけ +- práv +- らし +- なと思って +- ▁ceiling +- ijwe +- ▁그럼 +- ▁susan +- 側の +- のところ +- ▁проект +- だという +- 汇 +- ▁Freiheit +- 좋 +- ▁تت +- ▁trembled +- ▁Bil +- 祈祷 +- 樹 +- estima +- cze +- ▁goat +- ße +- ことです +- ▁sai +- 俊 +- ▁сюда +- ▁Rose +- ▁install +- 威尔 +- bett +- ▁Indian +- ▁luxury +- 聖 +- 芝 +- 投稿 +- ▁Dun +- ▁повер +- などと +- dec +- ▁emma +- ▁uncertainties +- lok +- っしゃ +- ▁signor +- 频道 +- 딱 +- ▁ئۆ +- ▁servir +- 復 +- 仅 +- ብ +- 委員会 +- agira +- ாய் +- 我们能 +- ▁ministers +- を加え +- 使用的 +- paga +- ▁urge +- ▁phenomena +- 翼 +- ahan +- 指導 +- bruch +- 标志 +- abord +- osos +- ▁essence +- ए +- atori +- ▁dried +- fica +- ▁ratio +- いますが +- ▁Lang +- 半年 +- 裁判 +- ▁drops +- ▁вось +- ▁Änderung +- 芬 +- ▁Beginn +- ▁emily +- ▁дур +- ▁investing +- ديد +- әк +- 운 +- ▁skills +- ▁potentially +- ▁herunter +- リード +- ▁revolt +- ▁soit +- ▁Bor +- メン +- 权利 +- ▁vertical +- ▁believing +- хі +- 自治体 +- ▁считаю +- 金メダル +- ▁groan +- ることができ +- 演出 +- களுக்கு +- үн +- rog +- kubwa +- веч +- ifer +- ▁emphasis +- 卓 +- getragen +- ▁ليس +- ▁pacific +- なくなる +- ▁bisa +- 扇 +- ▁Jen +- ▁страны +- ▁alarmed +- 署 +- ▁Sorgen +- 係 +- ▁كما +- ▁باشد +- ▁wheels +- 最後 +- 共和党 +- ▁tho +- ива +- 鼠 +- 半分 +- yum +- 分かりました +- ugi +- はその +- フレ +- ▁September +- ▁вечер +- と言って +- takse +- ▁voller +- šu +- éra +- 涉 +- banga +- ▁bass +- ▁Diskussion +- eres +- station +- ▁forming +- ▁attacks +- ▁renew +- ▁centro +- ggs +- ყ +- ▁diminish +- ▁faded +- ▁instruction +- ルド +- сал +- ▁wach +- ▁sozialen +- 的部分 +- 増加 +- 相似 +- 净 +- 泣 +- 在这种情况下 +- 味道 +- bourg +- を確認 +- 叫我 +- ▁steve +- 师父 +- kumi +- ▁Meer +- 一代 +- ▁Städte +- ▁erwarten +- 小子 +- ▁folded +- ▁Mut +- fähr +- اخ +- ▁sleeve +- ড +- ▁subsequent +- ▁bessere +- トロ +- rong +- ▁лично +- сту +- ест +- ▁weer +- ▁giới +- ▁назва +- ▁sara +- ▁acknowledged +- bier +- ▁Boston +- ▁مد +- 孤 +- 姨 +- ▁Risiko +- غان +- 犯人 +- ▁transmit +- ▁regularly +- ▁власть +- 做到这一点 +- 確かに +- спор +- ▁finde +- грам +- 可以说 +- ▁highway +- 非洲 +- ▁crush +- ▁tett +- 横浜 +- 月份 +- ▁gelesen +- なくなった +- ▁whale +- fällig +- ▁ripe +- ▁wherein +- 이랑 +- ダメ +- 勉強 +- ▁nang +- ▁Mitglied +- ▁famille +- 繰り返し +- カード +- 智能 +- dain +- ехал +- 罗马 +- 複 +- 訳 +- '99' +- 实在 +- 楽しい +- 이야 +- 発言 +- 命运 +- aktiv +- ▁dense +- ▁Up +- ▁reside +- 大家好 +- ▁dug +- ▁genetic +- 方の +- の間 +- ▁primitive +- ▁condu +- 听听 +- ▁dropping +- 進出 +- या +- まい +- тек +- ▁lowest +- ▁хүн +- ▁Kann +- ▁trova +- 妇女 +- шир +- ▁sandy +- ▁Això +- ▁которую +- ▁gossip +- ▁Ме +- 随时 +- ▁adventures +- ▁jsme +- 对手 +- 特征 +- ミス +- ▁repose +- ▁Gruppen +- ▁jackson +- ▁cage +- ▁Gewalt +- ▁Armee +- систем +- ▁rigid +- ంట +- ▁trink +- 大卫 +- မ +- ▁tumble +- сә +- 到现在 +- 我们如何 +- ▁summon +- ▁lên +- gesetz +- クション +- そこから +- サイ +- 관 +- ▁그랬 +- 帮忙 +- ▁lantern +- ▁Klein +- 果たして +- fusion +- hild +- wirtschaftliche +- 去世 +- цо +- present +- ▁users +- ▁pelo +- сид +- ▁sağ +- ▁Schuld +- 老爷 +- 出席 +- ▁license +- 臨 +- ину +- ▁neglected +- ▁Stu +- දි +- ▁maka +- ▁Zusammen +- ▁harp +- mill +- 是从 +- скай +- 彼得 +- 度过 +- schte +- 爽 +- 漫画 +- 杀死 +- 保留 +- عرف +- ▁emergency +- 他已经 +- ▁plead +- するという +- ▁destiny +- 解放 +- 午 +- まさか +- そんなこと +- 滞 +- pell +- ▁presume +- ތ +- ▁Chicago +- ический +- ▁crois +- ▁plastic +- linda +- ▁consist +- ▁jolly +- uke +- 董事会 +- يى +- ▁dedicated +- ຂ +- ▁проста +- 体の +- ფ +- 交代 +- いただく +- ▁genoeg +- 边缘 +- ▁ninth +- ▁flushed +- 他想 +- ▁impatient +- hst +- гын +- ▁agua +- ▁basin +- ▁prosperity +- 今朝 +- posto +- 罚 +- ▁lease +- ические +- ▁idee +- ▁shouting +- ▁soixante +- ▁oogen +- リスト +- ▁frog +- 两次 +- ونه +- 区の +- trouw +- ▁consisted +- 郊 +- ▁Ibi +- ▁Standard +- ▁partir +- 声明 +- čč +- ▁cordial +- ▁brute +- ▁rond +- ほしい +- 几个月 +- ▁denkt +- 消费 +- road +- kül +- ደ +- א +- ▁знаешь +- ▁ruins +- زان +- ▁segments +- lise +- あなたの +- 着他 +- 还得 +- 思った +- 俺は +- ▁incorpora +- ウィ +- ▁принима +- 其中一个 +- ▁induced +- ▁lively +- кән +- ▁recruit +- ▁viola +- 雨が +- ▁động +- ▁peep +- fter +- ▁Sommer +- 是很 +- を探 +- 让自己 +- ▁occasional +- лах +- ▁выход +- kud +- స్ +- ▁верх +- ▁nouveau +- ▁amerikanischen +- ▁ross +- ▁взял +- ▁questioned +- ▁Begriff +- ▁anfangen +- imwe +- вис +- ▁eby +- ▁irregular +- inya +- ▁April +- мах +- 物を +- hafte +- 男が +- ▁sites +- 事を +- ▁Jahres +- ▁realm +- 纹 +- ▁erinnere +- kosten +- 成熟 +- 仗 +- ِي +- 捨て +- մ +- ತ +- 입 +- はまだ +- ▁luego +- ▁dunkle +- 三百 +- ▁Pot +- کاری +- ▁deaf +- 物の +- 其实是 +- iam +- ▁impacted +- цый +- 生徒 +- ▁thankful +- melde +- '36' +- ▁situated +- 食材 +- barkeit +- рем +- 给你们 +- 为此 +- 他妈 +- ču +- 冲突 +- ▁confessed +- альный +- 墨 +- ▁친구 +- 固定 +- 主义者 +- ▁spy +- 驾驶 +- ises +- ▁exit +- lep +- しまった +- ர்கள் +- ▁discern +- ▁preparations +- 群体 +- ▁menschlichen +- zieht +- ▁institute +- 其他的 +- ▁neighbor +- gte +- 他们可以 +- hängen +- гаар +- رک +- zier +- 呐 +- পা +- ▁jewels +- 誰も +- jj +- ▁terme +- жив +- 眉 +- ansah +- 带来了 +- iumenge +- handlung +- 电台 +- 少ない +- ▁Farbe +- 浪费 +- ▁Jugend +- ▁чуть +- 行為 +- ард +- neuf +- ガン +- lch +- mö +- ukuri +- ▁gewinnen +- 得了 +- remo +- 弥 +- ▁tại +- ▁Mai +- 当局 +- link +- 黄金 +- тели +- 的家庭 +- ▁suburb +- ▁captive +- ▁summoned +- 堪 +- 葬 +- 不说 +- ▁bump +- ▁camel +- まとめ +- ▁checked +- 一回 +- ത +- イル +- ▁ebenfalls +- gust +- gic +- ▁luna +- ▁austria +- ▁exhibit +- ög +- ▁francisco +- únic +- seven +- オミクロン株 +- 过去了 +- 石油 +- racy +- 飯 +- ▁Wil +- ▁lado +- ▁flies +- беж +- ỏ +- ▁شب +- idir +- ▁variation +- 聞く +- ▁doit +- 平静 +- ▁Hälfte +- ▁aujourd +- woman +- ▁мои +- ▁stran +- ▁convince +- ▁prendre +- lī +- ▁geschickt +- ▁perfume +- 头上 +- ▁конце +- 里克 +- ▁turk +- ▁divert +- 没人 +- حي +- 郭 +- ▁benutzt +- ですかね +- nier +- Rh +- 适应 +- ▁trebuie +- ▁сай +- 前から +- ▁pledge +- が必要です +- 冷静 +- ▁fist +- 咯 +- ▁Gefahr +- 懸念 +- ▁брат +- 予選 +- cock +- つけて +- fond +- ▁кому +- ▁cô +- 为自己 +- ▁täglich +- チェック +- 强大 +- ела +- さが +- uzu +- 拉斯 +- ▁banda +- 遍 +- ▁Länder +- ັນ +- களின் +- ▁guessed +- ソース +- hoff +- 掛け +- 在哪儿 +- වෙ +- 拔 +- ▁threshold +- ▁advised +- ▁Behandlung +- тора +- ▁chaque +- ▁varied +- kay +- カン +- lā +- ▁jerry +- kunft +- ип +- ▁publication +- ▁Gründe +- 艺术家 +- لە +- ▁Vergangenheit +- 冇 +- 深い +- ốc +- ▁pueden +- ▁sentiments +- 西日本 +- ▁readers +- ▁Element +- ▁төр +- ▁gebruik +- lisa +- ▁factory +- ▁kau +- ▁Mitglieder +- 将是 +- ▁вдруг +- 工場 +- 风格 +- ации +- ▁interna +- ▁recalled +- ▁scream +- twitter +- 解説 +- xes +- 専 +- ▁Stern +- ▁няма +- ▁visitar +- ▁owners +- 0,000 +- ▁gefragt +- さすが +- ▁خواهم +- ▁shoe +- ▁dashed +- ▁zelfs +- bola +- ӑ +- ▁Schlüssel +- ວ່າ +- ĝe +- 高级 +- ▁persuade +- думал +- ということですね +- ▁celui +- 聞 +- री +- 他人 +- ▁Ini +- নের +- ▁pře +- ▁estan +- ▁recognised +- 伟大 +- からです +- 知道你 +- ▁reeds +- கின்றன +- 陣 +- zugeben +- ▁downward +- 総理 +- ▁plen +- 商量 +- 杆 +- ▁nosotros +- ▁vater +- ▁Ва +- ▁Ell +- 唱歌 +- ▁права +- 学者 +- ▁recollect +- ▁representation +- ▁Gefängnis +- િ +- ▁Erinnerung +- 他被 +- ▁арт +- ẽ +- ać +- ▁moss +- luft +- 置いて +- ▁Akt +- سف +- 心脏 +- ▁babies +- tekereza +- 食事 +- කා +- торы +- блюд +- のみ +- 分から +- رى +- ▁drain +- aidd +- ▁Abantu +- ģi +- 行った +- 詳しい +- ▁opge +- ▁deiner +- メニュー +- mula +- 得意 +- ▁struggled +- 窓 +- 偶 +- ▁Besuch +- ங்கு +- ▁enfants +- ▁Bern +- ▁perish +- 千葉県 +- ▁від +- 困惑 +- ▁آخر +- ▁Fue +- க்கா +- できて +- kond +- ▁სა +- ▁christianity +- くれ +- ▁nka +- ڪ +- ▁segon +- lique +- 强大的 +- っぱ +- ▁gains +- 那时候 +- 카 +- '&' +- ▁rechts +- TV +- 日本代表 +- 他们没有 +- lice +- ▁наших +- ▁Beruf +- 兆 +- лес +- ▁shrink +- ▁Albert +- ayi +- ▁forbid +- ▁miteinander +- 奸 +- 靴 +- uloj +- 州的 +- ▁ashes +- クト +- дов +- ▁refresh +- тат +- ▁freundlich +- lista +- ▁rings +- ▁fancied +- aktion +- ▁diseases +- ▁bekam +- 東京の +- ▁quien +- 上下 +- 我又 +- しょ +- ▁Як +- ▁biblioteca +- ▁jemandem +- 事物 +- ckey +- суд +- ▁overhead +- どうも +- جمع +- ▁Parti +- ▁pill +- قدم +- ▁Insel +- るから +- ▁sobald +- ▁Kap +- උ +- ▁aga +- ▁என்ற +- ▁Ни +- いている +- 险 +- ▁Berlin +- ▁victoria +- rato +- simil +- сек +- ▁categories +- ▁Quelle +- ▁Lake +- seb +- dama +- ▁восемнадцать +- antoj +- が高い +- ▁лишь +- ▁año +- bring +- ▁그렇 +- нні +- ▁дня +- ற் +- 当時の +- ▁renewed +- corn +- ▁goals +- 小学 +- 两天 +- ▁Gesetze +- ▁estamos +- どうやって +- ▁livre +- 案子 +- landa +- провод +- ម +- 別に +- ▁красно +- 七十 +- ごはん +- атор +- ▁yonder +- 听众 +- acre +- ▁uncomfortable +- sätze +- ▁programa +- bê +- ▁observations +- ▁muse +- кли +- 大幅 +- 他把 +- ▁beef +- ঠ +- ▁dialogue +- ▁marvel +- 请你 +- ▁awakened +- ▁Iran +- automat +- ▁чалавек +- வில்லை +- 实施 +- ▁contributed +- ▁richtige +- 它们的 +- みました +- ▁сказала +- ▁кем +- ▁escrit +- ▁hierher +- 他现在 +- ▁indifference +- யே +- 亡くなった +- 方面的 +- 数は +- 川の +- ಮ +- ▁Objekt +- シーズン +- ▁teatre +- 制定 +- 钢 +- ▁Ressourcen +- 箭 +- ▁augment +- 担任 +- 丰 +- ▁escort +- iris +- 月から +- ница +- ▁wenige +- пут +- ız +- ▁footsteps +- ▁Umwelt +- شى +- ▁mixture +- 呼びかけ +- 南方 +- ▁erg +- 岸田総理 +- ି +- cem +- ▁tests +- неш +- mura +- gré +- ▁менее +- há +- ▁pepper +- ங்கி +- 代の +- ▁разговор +- yamba +- 等你 +- ▁indignant +- 했어 +- ▁энэ +- ▁contented +- ▁speci +- ▁Staats +- ție +- その時 +- ුව +- ▁alliance +- 上学 +- おっ +- 前線 +- చ +- ▁schönen +- 分かった +- ация +- 緑 +- 勇敢 +- ▁thoughtful +- жан +- ▁trouve +- сно +- ▁Rechts +- ର +- ▁february +- 一辆 +- 举行 +- ères +- ▁minha +- toxic +- 牵 +- 释放 +- ▁gallery +- кажу +- ▁bestand +- ▁fils +- ▁Charakter +- ▁Sinne +- sache +- ▁Gast +- 在这些 +- ▁sper +- ヌ +- ái +- ▁Wood +- 六十 +- ▁simul +- しかない +- 仮 +- 医師 +- ▁Florida +- ▁distributed +- ▁flour +- oxid +- ன்ன +- の部分 +- 出る +- ▁saat +- ▁giá +- ▁reporter +- 就没 +- ▁amy +- ▁profund +- ▁International +- ▁framework +- ვე +- zir +- ложил +- 加速 +- しまいました +- ▁Ты +- lion +- 伪 +- 杀人 +- 准确 +- 値上げ +- ▁Organ +- することで +- ▁funciona +- нак +- どうしても +- 夕方 +- ▁вс +- 为我们 +- ▁gezeigt +- ponent +- ワクチン接種 +- ▁velvet +- ▁attraction +- と思いますね +- 巻 +- 購入 +- ▁Spaß +- ▁här +- ▁Baum +- 必要な +- ▁instruct +- ▁lordship +- 知らない +- ▁arbeitet +- 稍 +- 怖い +- ▁Ehre +- 실 +- 覆盖 +- ▁ладно +- ▁absorb +- ▁tread +- 踏み +- cate +- 誘 +- 空港 +- ▁patriot +- িয়া +- trust +- ▁Beth +- ▁transactions +- ▁tạo +- firma +- ▁Gö +- 实践 +- teg +- とり +- ▁claro +- もので +- かというと +- 打了 +- 我需要 +- ▁admired +- ▁bronze +- duction +- Bu +- ▁одного +- yla +- Ş +- 的女儿 +- جن +- ▁стала +- ▁conceived +- цей +- ▁akan +- biologi +- ▁palabra +- ▁stolz +- デン +- wärts +- ▁jsou +- ▁twilight +- ▁Brazil +- ▁verbracht +- reka +- ▁понимаете +- つける +- ppi +- ▁calcul +- general +- ▁igual +- ▁wealthy +- usia +- оо +- ▁harmony +- ▁Code +- 協 +- ▁принят +- 强调 +- пала +- ▁symptoms +- ▁trading +- ▁chef +- ▁answering +- 降低 +- 著 +- ▁expedi +- 这个地方 +- ▁swimming +- ▁froh +- ▁roused +- 車が +- フォ +- ▁chatter +- brun +- Univers +- ▁earliest +- hita +- ▁max +- ▁awoke +- 農 +- ladi +- ▁scor +- 垂 +- 教练 +- vollen +- ▁уверен +- 儿啊 +- pfel +- Ko +- ▁bate +- 慎 +- 夫婦 +- ▁tunnel +- ▁visto +- あん +- ▁uncertainty +- 有限 +- ▁conquer +- ▁decades +- ▁Lü +- ▁Mach +- ▁Sehen +- чыць +- 差し +- ▁warriors +- なさい +- 四年 +- под +- illon +- 狠 +- 感激 +- vik +- ▁decade +- ▁lime +- 的确 +- ▁Elizabeth +- 선 +- ボー +- ▁plunged +- 实验室 +- ▁Nachricht +- ühr +- 泊 +- ▁Viņa +- ▁managing +- ▁seul +- heden +- ▁Höhe +- ▁robber +- ▁testament +- bereich +- medic +- ipun +- トラック +- cott +- antic +- ބ +- ▁собой +- 发挥 +- мін +- ▁Mond +- ▁carro +- どうした +- aco +- ていきます +- ▁Després +- 凝 +- 计算机 +- ▁Podcast +- ▁получа +- ▁fais +- ▁mould +- ▁полу +- паль +- 想像 +- ▁Schutz +- step +- 相比 +- いち +- けん +- ną +- బ +- ーター +- ▁principi +- 二零 +- mail +- ▁hire +- ▁legte +- ▁degli +- ▁плохо +- کا +- 会長 +- ランキング +- хүү +- 盐 +- ▁limb +- ▁mineral +- 最喜欢的 +- 取った +- ▁proven +- mack +- ▁Themen +- ▁democrat +- 才会 +- 通報 +- new +- ▁satan +- ▁yaptı +- ▁jene +- koll +- შ +- ▁retro +- ▁عم +- '2015' +- 払 +- ▁Ces +- ▁innocence +- 間違い +- ▁array +- ▁yid +- ▁déjà +- ▁şekilde +- 始まった +- あなたは +- ▁electro +- 邮 +- 文明 +- 入院 +- داد +- 当她 +- ▁upright +- ▁jours +- ポン +- ▁Cel +- すこと +- ▁ekonomi +- ▁suicide +- きっと +- كت +- ▁corruption +- ▁tue +- ▁Karte +- 危機 +- мас +- ▁الل +- っこ +- ▁apprehension +- 磁 +- worthy +- ▁constrain +- lwa +- ▁jacket +- problem +- ▁anxiously +- ▁hohen +- ▁কর +- angi +- ▁subscribe +- ▁проблема +- ▁цяпер +- 篇 +- 好看 +- ▁lösen +- ▁parson +- গা +- ש +- 廊 +- 天然 +- ▁своего +- 联盟 +- üü +- ▁cob +- 赖 +- ▁Sä +- ▁rattle +- тин +- おく +- ޭ +- 华盛顿 +- messen +- ▁göster +- ▁muchos +- зон +- 拆 +- ▁tracks +- 不确定 +- ▁المن +- ランド +- ▁niba +- ไป +- ▁resulted +- 这个国家 +- もあった +- leta +- ▁multaj +- ▁classic +- 梨 +- ކު +- empre +- վ +- ▁matthew +- ▁strategies +- ▁Rad +- セン +- ねぇ +- 屋さん +- 旨 +- ▁amazement +- ږ +- ▁같이 +- mbang +- ▁Max +- 曜 +- ▁terribly +- schütt +- ▁адзін +- と発表しました +- ▁Gottes +- 拿到 +- 構 +- クイズ +- ▁hideous +- ▁مهم +- ▁каза +- 타 +- tzt +- ▁çalış +- 来吧 +- 平和 +- ▁twe +- ▁mule +- クリーム +- 成绩 +- ▁vrij +- というのも +- ▁weißen +- ▁cathedral +- ▁viņš +- ▁pobre +- 构 +- てくれ +- issement +- শি +- ▁tenth +- ▁pretended +- 朵 +- дт +- ▁thief +- ▁planted +- ▁persist +- 喷 +- ▁долго +- ▁zufällig +- ddu +- 的精神 +- たった +- анд +- 钻 +- 郡 +- ▁завтра +- ▁eminent +- ▁losses +- ▁drie +- 棋 +- annya +- 楚 +- ▁причин +- ▁flick +- ▁bekommt +- ▁pode +- ▁unterstützt +- ▁Tiere +- 的内容 +- が出る +- ▁какое +- ▁Ball +- するために +- ▁mwaka +- 回目の +- ▁visits +- hne +- ▁australian +- ▁oldest +- ▁variable +- ssystem +- ▁rely +- ▁стоит +- 客人 +- altro +- 亡くな +- ▁dispers +- 被害者 +- bber +- ▁Kerl +- 裤 +- 等我 +- ▁Mikro +- ▁Barr +- овый +- ▁occup +- ች +- くり +- 我确实 +- ையும் +- ▁situas +- 洁 +- finanz +- ▁gewe +- 徳 +- داخل +- ▁threatening +- 失望 +- ▁Beweise +- ろうと +- ▁holl +- 准备好 +- ▁bells +- 啲 +- ▁shakespeare +- 拳 +- скую +- kasi +- 推动 +- ▁Schlag +- ým +- ▁oncle +- ▁dorthin +- ▁assert +- ಲ +- 培训 +- ▁unwilling +- 位の +- ▁bills +- ▁drivers +- ▁instru +- 弟弟 +- 各国 +- tip +- ▁avail +- kade +- 瞎 +- 公子 +- 历史上 +- エリア +- ▁tierra +- ▁старо +- 皆 +- ▁headquarters +- 翻译 +- 組織 +- ▁Feder +- ood +- экс +- ▁videos +- 为我 +- ception +- 官员 +- 審 +- ▁자기 +- ▁Kollegen +- imbu +- nywa +- ▁raven +- ▁sultan +- ffy +- guha +- 阻 +- шым +- рек +- ▁Chan +- 夏天 +- 対戦 +- ▁derzeit +- けば +- 自分たち +- ▁einzigen +- '2020' +- 籍 +- ▁pluck +- ▁Allgemeinen +- ▁Einfluss +- 为什么不 +- ▁environmental +- сць +- ▁separation +- siniz +- ▁Fal +- 娶 +- ▁මේ +- ▁induce +- ▁ebenso +- ▁donner +- ▁снова +- orde +- 打席 +- 概 +- 收拾 +- ▁Finger +- ▁Schwarz +- やすく +- ▁linen +- ▁filling +- 贡献 +- 震惊 +- ▁Präsidenten +- ▁proceeding +- 地图 +- champ +- issabte +- ▁быць +- 带走 +- зав +- ▁kämpfen +- 捜索 +- ▁policies +- 演技 +- лап +- 思いました +- ▁egy +- ▁плат +- 分配 +- 驱 +- 耳朵 +- 降る +- ▁sally +- ▁gotta +- 氏は +- どういうこと +- عتقد +- 繁 +- バッター +- ▁decree +- をかけて +- ▁deinem +- ▁beggar +- ジョン +- tino +- ▁gehören +- hwa +- 授 +- łu +- ▁strongest +- ▁recommendation +- ▁nchi +- zil +- 了个 +- ▁prepara +- 电子邮件 +- 知事 +- ▁trabaja +- arak +- 覚えて +- 前回 +- ▁habitual +- cky +- 耶 +- ▁செய்த +- cina +- łem +- ▁pencil +- ▁ĝin +- そもそも +- 였 +- ▁triumphant +- tiği +- ▁visiting +- ▁recording +- 变得更 +- 牺牲 +- ▁hesitation +- ▁erschien +- 千万 +- kende +- rrington +- ùng +- ▁conquest +- ▁openly +- ▁sali +- ▁herauszufinden +- ▁особенно +- бод +- せい +- ▁sulla +- ņu +- 弯 +- ▁tug +- ▁européenne +- ▁gewisse +- čin +- ением +- てた +- ▁promises +- дат +- ırı +- ▁buiten +- ızı +- ▁inflict +- mädchen +- ▁ermöglicht +- ▁maintenance +- ▁curiously +- خواه +- ▁hình +- ▁третья +- ▁кир +- 战略 +- лон +- ▁rocky +- 来週 +- straße +- 问他 +- ▁mächtig +- 炉 +- 纪念 +- ́ +- ホントに +- antwoord +- 完成了 +- コーナー +- ▁musician +- ▁Chinese +- キュ +- 信心 +- ▁mansion +- 奏 +- ąc +- 娱乐 +- ▁extension +- ▁decay +- ▁arbeitete +- ▁dood +- 很好的 +- やん +- ▁hoy +- 근 +- ▁anguish +- 僕が +- が見 +- ▁electricity +- espera +- ▁cove +- ▁feathers +- lif +- 说的话 +- なります +- ▁hilft +- entes +- 有问题 +- ▁reuni +- 安慰 +- ирует +- diplom +- ję +- deki +- anja +- 找不到 +- цвет +- brand +- 闘 +- 有着 +- 居民 +- නා +- したもの +- troph +- ▁purely +- ▁troop +- ずつ +- рост +- ▁nicholas +- шей +- ▁moeder +- ショ +- ▁Sonnen +- ▁aucun +- してくれる +- 物語 +- 乾 +- முறை +- ▁landlord +- ▁interval +- 一套 +- unternehmen +- 怨 +- 宏 +- 部长 +- ▁менән +- ▁lagi +- ▁historian +- ්‍ර +- ▁erect +- ▁waved +- кнул +- ▁plains +- мал +- rce +- tuvo +- 残って +- 泳 +- 图书馆 +- ▁inclination +- kamp +- ▁такі +- 出发 +- にくい +- ▁verde +- 盯着 +- ▁jimmy +- ▁messages +- ▁liebt +- 点儿 +- ▁lifting +- пры +- ▁productivity +- پل +- креп +- ▁witnesses +- яз +- るんですけど +- 师傅 +- année +- ▁Mehr +- ▁equivalent +- ▁regi +- ën +- زي +- てくれる +- 突き +- ▁wütend +- ▁translation +- 回忆 +- ▁هنا +- 導 +- ▁partit +- ▁maintenant +- cela +- ▁außerhalb +- のことを +- ▁turkey +- ▁які +- 有任何 +- ▁disciples +- 終 +- なと思います +- 梯 +- kole +- ▁Künstler +- ▁konzentrieren +- ▁crimson +- енко +- 私たちの +- ▁Sonne +- 符 +- ▁garrison +- യ +- ▁furious +- 金属 +- 出于 +- ssé +- ▁durchgeführt +- ▁victor +- dele +- ▁packed +- 漂亮的 +- ▁madre +- aggio +- үүр +- ついた +- 在外面 +- 长得 +- ▁cần +- ▁blowing +- දු +- ▁Mari +- ▁strictly +- ющих +- lessness +- ▁Prä +- ajo +- 资料 +- ▁Lord +- くれて +- 肚子 +- ▁wounds +- ▁nếu +- チーズ +- carna +- ホール +- 들이 +- ску +- だんだん +- 克里斯 +- ▁бүр +- ▁demonstrate +- haga +- 寻 +- 追加 +- рей +- 头脑 +- 立つ +- 芳 +- जा +- ▁Oku +- 西部 +- 期望 +- ▁saber +- れている +- 柏 +- ▁девятнадцать +- 거 +- инг +- うちに +- ▁nhận +- ▁sommes +- なのに +- ▁betrayed +- Ç +- つもり +- るんですよ +- त् +- ▁herein +- りたい +- nium +- 人気の +- 现象 +- 抬 +- 的公司 +- ნა +- hali +- ▁admirable +- 髪 +- 这里有 +- ▁ultimate +- ▁devas +- つけた +- 募 +- ▁warmth +- ▁reject +- 贸易 +- 警視庁 +- 簡単に +- 感受 +- ▁organizations +- ▁realmente +- ▁schwarzen +- ▁Glo +- '75' +- ▁unjust +- 居然 +- 的例子 +- ▁Jungen +- ▁Què +- ования +- ▁retra +- 一些东西 +- ishwa +- ▁dingen +- ▁nime +- ▁què +- цов +- 取って +- komeza +- 躺在 +- ▁gravity +- ▁Таму +- ▁englishman +- ▁artistic +- ▁housing +- 在于 +- ▁allan +- 温柔 +- amerika +- 的时刻 +- fahrt +- ▁schätze +- ▁Opfer +- 伦敦 +- ▁greeted +- ▁ranch +- ▁tú +- ▁exile +- 小时候 +- 董 +- ▁illusion +- ▁Finanz +- ▁willst +- ▁Come +- ▁geheim +- ▁weep +- ▁Waffen +- bó +- vā +- 昏 +- ▁underneath +- dığı +- ▁Jane +- ▁вопросы +- ▁dopo +- 杰克 +- 主義 +- gesprochen +- ▁groote +- ▁история +- 受け入れ +- 負担 +- ▁масс +- awo +- ▁cứ +- ▁advise +- 涨 +- 笑顔 +- plu +- ▁আর +- 兽 +- ▁stroll +- 哪些 +- ▁conceive +- なった +- ▁nachdenken +- ▁precede +- 先輩 +- graben +- ▁Einer +- ▁Boot +- バック +- ілі +- дова +- скі +- 无论是 +- eira +- ▁comedy +- 你得 +- ▁Però +- 入れて +- роў +- ▁narra +- ography +- 味が +- 帰って +- 拍摄 +- 评估 +- ▁inquire +- 类型 +- گان +- ▁bestehen +- ▁nacional +- ▁fremd +- 天空 +- ▁opponent +- gebildet +- ものは +- escent +- ▁blown +- ▁unterschiedlich +- ▁substitute +- 我只 +- ▁withdrew +- ▁Kri +- 込め +- ▁From +- ▁explosion +- fragen +- ▁exclude +- 女性が +- papier +- ▁heiße +- ▁praktisch +- ▁mga +- ▁foster +- 滋 +- ▁Earth +- ▁troviĝas +- 教室 +- 到时候 +- けが +- ▁frightful +- ▁nationale +- 在那个 +- ланд +- ▁firing +- ора +- たん +- 存在的 +- 镜 +- trakt +- 術 +- اث +- ▁那我们 +- ▁scarlet +- ▁exhaust +- хар +- ▁circum +- ▁ancora +- 皮肤 +- 替え +- 充满了 +- づけ +- 放心吧 +- 你看看 +- ▁ажил +- ▁yourselves +- 串 +- 做一些 +- ▁enlighten +- تاب +- ▁vierzig +- 统计 +- ▁harris +- kanye +- ثر +- 申 +- ▁moan +- ▁impressive +- ▁Tele +- кур +- цыі +- 导演 +- 你说的 +- いなかった +- ставить +- ▁blend +- ▁wretch +- ▁descent +- seid +- ▁stages +- 剤 +- ▁정도 +- 我知道你 +- ▁Holz +- ▁boven +- ルール +- ▁هە +- ▁adjust +- 続けて +- 海上 +- ▁Argument +- chau +- 勇气 +- 洪 +- 見ると +- ▁motionless +- が必要 +- ▁Schulen +- ▁Spitze +- ▁tenia +- 明け +- 遅 +- 投手 +- chain +- 跟踪 +- ▁Zeiten +- ▁mọi +- 待って +- haupt +- daj +- 的妻子 +- ▁urgent +- tack +- ▁publi +- 桶 +- 盆 +- 夹 +- ▁crushed +- これまでに +- ▁oriental +- てくる +- ▁reverence +- 在过去的 +- ▁heroes +- arlo +- ▁toilet +- しゃべ +- クラス +- ünü +- paro +- することが +- mela +- ▁jealousy +- fant +- 挡 +- ▁significance +- 系列 +- 灾 +- ▁gym +- завод +- 気に +- ▁tipp +- ▁barbara +- ▁Actualment +- ▁Today +- 態 +- ющие +- ▁discussing +- 能源 +- ▁granda +- ▁спросил +- onic +- ือ +- ▁Neben +- さない +- アー +- ▁아이 +- ▁hizo +- いたします +- 外国人 +- มา +- klop +- 反映 +- wirken +- すと +- フォー +- ьян +- そのあと +- ▁кара +- ▁jeff +- 死者 +- ሰ +- ▁violently +- 読み +- giving +- ▁하나 +- чик +- 之下 +- 回り +- 조 +- တ +- umbi +- ▁convent +- ▁altered +- 成为一个 +- ▁conven +- 으 +- ▁affirm +- ▁المح +- ▁Kern +- angka +- ஃப +- 听见 +- 的经历 +- 燕 +- 赔 +- 」「 +- ▁pav +- ▁starke +- ▁tiger +- 且 +- 的发展 +- pois +- ▁busca +- らが +- ыт +- ความ +- ologische +- ▁другие +- の上に +- ▁comrades +- ゅう +- ত্র +- 充分 +- tez +- ▁petition +- 人群 +- руб +- tafel +- 到来 +- ▁sailing +- ఇ +- ▁милли +- 超え +- ▁Hü +- ▁четвёртый +- 取决于 +- yobozi +- لق +- 写作 +- strahl +- るために +- ▁edition +- ▁possibilities +- 社交 +- ▁attribute +- ▁enjoying +- サイド +- ured +- 手指 +- 叛 +- istische +- ▁tự +- 法庭 +- ▁бизнес +- 开玩笑 +- ▁academic +- ▁позвол +- 贷款 +- 为他 +- ▁Vall +- 比例 +- princip +- ەی +- ▁increasingly +- 爆炸 +- ▁heartily +- ▁japanese +- êr +- 巻き +- enfant +- ından +- ▁collabora +- undvierzig +- 尘 +- ▁whither +- 不怕 +- ką +- mēr +- ▁그러면 +- ▁diversos +- ▁medal +- ifies +- стары +- valent +- ▁ungefähr +- 쪽 +- ▁dagegen +- 高齢者 +- ▁তিনি +- ড় +- 記念 +- larda +- 态度 +- ▁leisten +- ▁связан +- ▁assez +- ▁stove +- 白い +- ებ +- 痴 +- ▁mới +- 殺害 +- 創 +- ▁align +- 特别的 +- ildi +- ▁Karriere +- ▁laat +- richtung +- 真っ +- adores +- 疫情 +- といえば +- 맞 +- ▁proceedings +- 不一定 +- ▁throng +- дает +- 美国的 +- ▁genom +- 企 +- 泪 +- ▁greeks +- ▁compound +- ▁reporting +- escola +- どおり +- ▁butler +- ▁groupe +- ట్ +- дад +- めっちゃ +- 年前に +- three +- ҵ +- 闷 +- 詳しく +- 著名的 +- ▁envy +- 一眼 +- もらえ +- ▁punish +- يرة +- 骄傲 +- デザイン +- ▁menjadi +- 悩 +- を得 +- দু +- 進む +- 好事 +- 一座 +- ▁cargo +- 你再 +- 聞こえ +- 작 +- ▁ваши +- でしたね +- ▁platforms +- ▁clair +- るんですね +- ▁personne +- 现在我们 +- において +- ▁madness +- 본 +- ▁Wesentlichen +- ▁konuş +- ▁откуда +- tius +- енная +- вит +- يج +- ▁shops +- zep +- ží +- ▁Botschaft +- ▁devant +- ▁abraham +- ▁respective +- 马克 +- شو +- здоров +- ▁motives +- ības +- ▁encouraging +- 辣 +- 還 +- ▁möglicherweise +- ▁Realität +- 国外 +- 尼亚 +- larında +- 挺好的 +- ▁거지 +- ▁Ding +- exist +- 再生 +- ހ +- 揭 +- 高橋 +- gression +- ▁zusätzliche +- ▁karşı +- 做的是 +- ▁beobachtet +- õi +- ▁Genau +- үүн +- ▁anim +- 徹底 +- キン +- 下げ +- ▁şu +- 失敗 +- ▁Hab +- பட +- ▁Quel +- rono +- ▁Cer +- ▁تلك +- の動き +- 感覚 +- 続ける +- mania +- bala +- 给自己 +- લ +- ▁Strom +- ▁Што +- 精彩 +- ▁jew +- ▁Entscheidungen +- ▁customs +- ▁dismay +- 大学的 +- 视为 +- なお +- ஐ +- ▁lachen +- ドラ +- ото +- ▁porch +- 寄り +- ▁consul +- ▁dusk +- 全世界 +- ▁diverses +- ພ +- ▁excellence +- 她会 +- رن +- ▁concentra +- 議会 +- ▁Over +- ழ் +- 我们看到 +- ▁mach +- ▁Regen +- 万一 +- ▁Пер +- すべき +- ▁Frieden +- ▁шаг +- oph +- ▁viene +- 若者 +- ▁hostile +- 拿走 +- 注意力 +- į +- spiegel +- ていない +- 中国の +- dige +- 万美元 +- 发明 +- dorf +- ▁россия +- tesse +- зве +- шло +- ▁скоро +- ▁dentro +- ▁ngày +- してきました +- ▁metge +- ▁echter +- 聚集 +- 劫 +- ٹ +- ▁Führung +- ɛe +- ▁villain +- ▁deceived +- ▁eenige +- 的变化 +- つつ +- sieg +- ▁traurig +- ▁membres +- ận +- もう一度 +- 站起来 +- ありがとうございます +- tòria +- ▁trente +- ▁Imp +- čen +- ▁protested +- 试着 +- ▁welfare +- 福岡 +- ▁oui +- гр +- 病気 +- 某人 +- roning +- 中には +- ▁encountered +- 浩 +- 真实的 +- ▁vile +- ▁battery +- 提升 +- ου +- ира +- 摘 +- ▁Amerikaner +- ր +- ▁ambassador +- に入る +- ▁بش +- 这个故事 +- ▁tengo +- ▁fragments +- 舌 +- タイプ +- ิน +- ▁relatives +- 提前 +- ▁secretly +- 晴 +- 我们俩 +- rührt +- ▁Center +- ▁martyr +- 邻居 +- ▁süß +- ▁exceptional +- ほうがいい +- 魏 +- 嫁给 +- 床上 +- 这真的 +- ダンス +- park +- ▁eigenes +- ▁naught +- ▁yapı +- 道歉 +- ▁catching +- 这份 +- ▁verstanden +- 透明 +- тся +- 的生命 +- ▁wichtiger +- 沮丧 +- '65' +- ▁montre +- ▁tarafından +- 依赖 +- ▁aufgrund +- ホーム +- ▁swarm +- 全力 +- 終わ +- ▁earthly +- 捉え +- ပ +- ▁expanding +- 懒 +- ▁Uganda +- 辆 +- ▁morris +- 唉 +- 有钱 +- gero +- ▁només +- 孟 +- ▁Nó +- ▁cơ +- できるよう +- ▁haunt +- かって +- しまいます +- ▁satisfactory +- 你为什么 +- 差异 +- てきました +- ▁unmöglich +- ▁давно +- ▁Wohn +- 做过 +- 厨房 +- ışı +- 到这里 +- ▁compensation +- ▁teh +- ▁aroused +- kulu +- cava +- ▁verses +- ▁каким +- ▁deliberately +- ▁zullen +- 爱你 +- lje +- ▁carl +- ▁계속 +- 消费者 +- 日本で +- schluss +- lıyor +- 広島 +- ▁أنا +- 常见 +- ▁Joseph +- ▁muscles +- ▁tarde +- 向上 +- ▁проблем +- ัน +- ▁cao +- 瞬间 +- onym +- バラ +- ▁Journal +- 緊急 +- ▁Winter +- ▁моя +- 更有 +- ▁بخش +- 的房子 +- ceea +- mişti +- ライト +- ▁calculated +- ▁général +- 茂 +- 심 +- ▁cuatro +- 生まれた +- ▁travelled +- 成就 +- かつて +- ▁joc +- ▁parting +- ▁hört +- ▁sống +- stup +- meze +- щен +- ▁адна +- ским +- ▁attending +- ▁crest +- 把握 +- ▁cellar +- ▁الناس +- beck +- 男性が +- 好吃 +- னு +- ▁justified +- ້າ +- рв +- 动力 +- ▁Straf +- どのように +- लाई +- ▁woke +- ▁fascinating +- صح +- ▁vrai +- に住む +- cite +- ▁Fest +- 狱 +- ▁tumult +- かく +- けない +- ්‍ය +- ▁дух +- ▁holland +- 仇 +- ▁inspector +- ▁Dol +- 大小 +- 誉 +- ▁Jahrzehnt +- みましょう +- ▁Gall +- ▁hiện +- 停下来 +- 居住 +- wandel +- мел +- މ +- 悠 +- ▁slaughter +- 肝 +- の前に +- 粉丝 +- ▁swamp +- を集め +- ▁befand +- ▁пятьсот +- ▁Edward +- 加油 +- ▁femmes +- geschlagen +- ▁wilson +- ▁failing +- ▁Früh +- ▁manuscript +- めの +- ▁000 +- 情报 +- quadr +- ▁observing +- ถ +- ▁tommy +- 看过 +- するよう +- ▁twas +- ▁Labor +- ▁jahre +- ▁cruelty +- ▁flowing +- писа +- ▁Other +- ▁trenta +- ▁corrupt +- 如果说 +- 厅 +- 계 +- igihugu +- ▁ກະ +- ▁causing +- шча +- 少爷 +- ▁severely +- ▁dünya +- leiter +- ティング +- အ +- yel +- ических +- ▁nineteenth +- ማ +- 成功的 +- 的看法 +- ▁Traum +- ▁Gui +- ▁edit +- ▁exterior +- 思维 +- held +- 色の +- ▁sincere +- لىق +- ▁sever +- ▁considerably +- 有一点 +- logi +- ▁också +- 团体 +- ходят +- ▁colonies +- berries +- kunde +- ▁oldu +- determin +- ▁poets +- 与此同时 +- 貴 +- ▁incremental +- bereit +- ▁lowered +- ໍ +- 激动 +- ▁Gas +- 富士 +- зер +- ▁Sar +- なるほど +- ▁cooper +- قب +- 轻松 +- ographic +- ▁uneasy +- ▁ancestors +- ște +- core +- ▁colli +- ▁okw +- ının +- 損 +- 経営 +- 変わる +- modul +- valuation +- 说的是 +- தன் +- 状態で +- bû +- ▁مخ +- lò +- ▁должна +- ▁confine +- насці +- ▁devour +- ▁shaft +- ▁purchased +- mwi +- 物种 +- 僕の +- 찮 +- 阿尔 +- ▁paradise +- ▁glare +- альная +- евич +- ▁rejected +- ▁стать +- シェ +- ▁Glas +- 艰难 +- ▁lily +- ▁flora +- 之一是 +- ▁aliaj +- ▁starb +- 働き +- ▁Seit +- '.000' +- ▁Zweck +- ▁Fähigkeiten +- 災害 +- なこと +- 迈 +- ▁commissioner +- フリー +- 피 +- ▁verändern +- 冒险 +- adel +- ▁begonnen +- 小学校 +- 后果 +- '32' +- ubuzima +- 的身份 +- ▁allah +- ▁screw +- 早い +- ▁Strateg +- ách +- ▁fram +- igte +- ▁заяв +- ▁dritte +- പ +- ▁iemand +- хә +- 两人 +- рым +- jja +- ▁vot +- われて +- ▁nuevo +- ▁erklärt +- せず +- އް +- ▁zufrieden +- 歩いて +- ▁declaration +- AI +- 的文章 +- ▁مردم +- おう +- technologie +- ▁книг +- rani +- 保守 +- ▁confer +- ▁questi +- affi +- ▁mijne +- ▁hither +- 这些事情 +- ▁twentieth +- ▁White +- ▁hue +- jih +- garde +- დე +- ▁greeting +- ebwa +- ▁Sachen +- ▁gladly +- 稿 +- ▁encuentra +- ウイルス +- خی +- سة +- ▁sour +- ▁equi +- ▁tempest +- ▁провер +- rava +- द् +- 伙 +- تها +- ▁psycho +- ▁blocks +- ▁Öffentlichkeit +- 暂时 +- ▁shopping +- 他要 +- ▁disagreeable +- ▁dominion +- 井さん +- வாக +- ▁marketplace +- ▁compromise +- 浜 +- いただきたい +- 作る +- 위 +- YouTube +- ziehung +- ▁terrorist +- 随后 +- ▁seltsame +- 血液 +- وش +- ▁tragic +- ▁danced +- ▁excessive +- мыш +- fari +- sail +- ็ +- 是一名 +- ▁gaf +- ▁invested +- 深入 +- 奴隶 +- ੀ +- ▁destined +- ▁böse +- 夕 +- ается +- 人士 +- ▁gulf +- そうで +- 很快就 +- ▁eldest +- ▁Angeles +- 従 +- வின் +- 我非常 +- prüf +- 車の +- 纪 +- ▁placing +- 営業 +- ▁lovers +- ▁tornar +- leistung +- 東海 +- 苦し +- kuza +- ▁然后呢 +- vuze +- 虐待 +- فن +- trans +- ▁membre +- したということです +- maschine +- ▁Ash +- laba +- ▁Stre +- そうか +- ையில் +- 恐怕 +- ▁footprint +- ▁gush +- kiza +- گیری +- 公式 +- ▁Ruhe +- ▁imperfect +- ▁posted +- prop +- 的目的 +- ▁prospects +- ▁Version +- このような +- ކ +- ո +- கிற +- ▁arrange +- ▁мяс +- ▁transportation +- 装置 +- ▁Dü +- itel +- ļu +- คุณ +- 你喜欢 +- ▁basa +- vro +- ▁নাই +- ▁Euro +- praw +- ▁militar +- ▁Fel +- теп +- ▁twisted +- 筹 +- ▁fosse +- ▁юу +- ▁Fred +- ▁harbour +- slov +- pov +- ▁கட +- ອງ +- 你没 +- лэг +- ▁недо +- ▁wishing +- 上げる +- 扱 +- ▁retorted +- 疑い +- ▁އެ +- ivity +- Ra +- cado +- 不管是 +- 无论如何 +- 即将 +- ▁aange +- ாத +- ▁lifetime +- вання +- 哎 +- kräfte +- irira +- ▁defeated +- დი +- ▁llega +- ▁слово +- ▁آل +- ▁Kurs +- ▁lebte +- ▁Med +- گرد +- 制裁 +- 다가 +- 歇 +- ▁broader +- quant +- ▁slate +- ▁запад +- を終え +- ▁crawl +- алт +- нула +- ▁рыб +- ▁verstehe +- зм +- 严重的 +- ▁mereka +- 要不然 +- ありまして +- ▁Central +- ▁amazon +- ▁دارند +- ниц +- ▁mater +- ▁regulatory +- ▁Verfügung +- 丘 +- 已经被 +- ▁offerings +- ▁Tio +- லே +- 一路 +- ёл +- ర్ +- ▁gevoel +- お天気 +- ▁chorus +- ▁shallow +- 祥 +- ▁дерев +- 的其他 +- amaga +- ▁trata +- іст +- Reg +- ▁Monaten +- ▁schwach +- ▁Bio +- rew +- ▁cinco +- ▁Vertrauen +- オンライン +- 辩论 +- 说到 +- ▁dön +- 宣言 +- ってきました +- ▁roi +- ないか +- 下载 +- 这将是 +- ω +- ▁phenomenon +- اغ +- 売り +- кас +- ▁foam +- ▁tako +- ▁trumpet +- kür +- 野球 +- 行われる +- 旦 +- ķ +- ▁platja +- خانه +- 打电话给 +- ▁bloody +- かける +- ▁attentive +- ▁reckless +- 倒是 +- のほうが +- ▁publicly +- 教师 +- пля +- 護 +- 不相信 +- 山口 +- ▁gefährlich +- ▁johnny +- ▁punct +- فهم +- 形象 +- ▁lump +- 神奇 +- 不是吗 +- 赚钱 +- 有意义 +- জি +- ▁йо +- ▁bonds +- 겠 +- 糟糕的 +- ▁shaken +- ປ +- estudi +- ▁prose +- ▁chains +- 伝わ +- cac +- ▁wist +- قت +- ▁spectator +- 庫 +- ▁comparatively +- ▁zulk +- ▁notable +- ▁Пры +- cida +- ▁sustained +- volk +- ▁сторон +- ▁Kongress +- いたのは +- ần +- ▁pow +- ▁waving +- ▁breathed +- 供应 +- ▁höchst +- ▁получил +- ▁juda +- 僕も +- ▁Manchmal +- ▁banner +- үз +- ▁corpse +- 心中 +- ▁einiger +- 确实是 +- bericht +- ▁Orte +- ▁Angel +- 服装 +- 柴 +- ▁Аднак +- 三天 +- ▁Umgebung +- ▁Flor +- ▁corte +- ▁migra +- ▁expanded +- ▁positioned +- 始まる +- ▁تش +- ▁Бу +- 线索 +- чал +- ▁Big +- 的父母 +- 融 +- ▁philadelphia +- ▁concrete +- ▁hacia +- いるので +- ▁niemals +- gyn +- ▁schneller +- ▁compris +- ることで +- 有一次 +- 凤 +- ▁tribunal +- ▁engineers +- таб +- drücke +- ▁voran +- ▁dumm +- 싶 +- вшись +- ▁anstatt +- ront +- なんとか +- garten +- 恐らく +- коло +- '34' +- 等于 +- acqua +- 觉得自己 +- ▁manier +- ▁voz +- ことが分かりました +- だけでなく +- ▁بالا +- ▁большой +- 的第一个 +- 누 +- 整体 +- яў +- 这个世界 +- ▁dishes +- たくさんの +- ▁över +- ▁shocked +- 经历了 +- 栄 +- ▁pleasing +- ابت +- 物理 +- koresha +- ▁ojos +- ▁Parliament +- いらっしゃる +- பதி +- 一杯 +- 標 +- 难以 +- ▁dien +- ▁buena +- ▁якія +- の間に +- ▁naval +- ▁teori +- 责 +- ▁plato +- өрө +- kera +- ▁searched +- ▁remembering +- ▁ceva +- ▁interpretation +- ▁месте +- ▁deemed +- ▁positiv +- azioni +- ▁exhibition +- ▁davant +- ▁dome +- 少女 +- geschäft +- 难过 +- sanga +- ▁loudly +- ▁sinner +- werp +- 马上就 +- rima +- ▁থেকে +- 玻璃 +- رح +- ▁begannen +- ▁cherish +- ▁bail +- ่อ +- ▁Ara +- потреб +- ආ +- ເ +- 보다 +- preis +- ▁McC +- ▁creep +- ▁behandelt +- フル +- ▁fees +- اك +- 打破 +- ستر +- лены +- ▁drowned +- 卑 +- ▁unabhängig +- ৰি +- きれい +- ▁mô +- 知道我 +- treffen +- ▁People +- 你今天 +- 还可以 +- 最初に +- 新型コロナウイルス +- ▁announce +- ▁ocup +- ▁решение +- 请求 +- issant +- trek +- ▁bout +- ▁Great +- 自ら +- ▁نوع +- ▁ribbon +- おいしそう +- ペン +- ▁ibintu +- ziri +- нский +- るんですか +- ▁திரு +- ancia +- ▁exploit +- 厳 +- ▁contemplate +- كە +- 工厂 +- foc +- ▁inquiries +- ▁observer +- ▁entry +- cora +- িত +- ивает +- ҙе +- hield +- equip +- を与え +- 相同的 +- ▁practic +- ▁smallest +- 年度 +- ▁unity +- 値段 +- 就像我 +- ৰা +- 一篇 +- ▁reducing +- ▁مص +- ▁wrist +- 小さい +- 遇到了 +- ▁injustice +- ච +- 就是我们 +- ▁gasped +- 之中 +- ▁zouden +- home +- 它们是 +- ▁مختلف +- 蔵 +- ▁Madame +- ▁manche +- 婆 +- intel +- ようになった +- ▁দেখ +- ▁controlled +- ▁fazla +- ▁coral +- remos +- ▁bending +- 瞒 +- おっしゃって +- ▁Não +- ▁provinces +- ▁peng +- ▁мала +- 你可以看到 +- ▁florida +- ▁Estas +- ▁disclose +- 说服 +- 错过 +- 浸 +- 認 +- ត +- ▁flexibility +- ▁entertained +- 実施 +- ▁diamonds +- あんた +- 这首歌 +- ▁frac +- 生きて +- '46' +- ግ +- の名前 +- ▁thorough +- ▁multipli +- ким +- ▁dahin +- 尸体 +- ▁seeds +- 遅れ +- asso +- boj +- bě +- grand +- ▁Mini +- 因为她 +- යක් +- 仔 +- 岳 +- ▁Lass +- liku +- ием +- 花园 +- ▁besoin +- ▁anos +- 诸 +- ▁oogenblik +- ニューヨーク +- ▁può +- ▁British +- 提到的 +- इ +- 出口 +- ▁legacy +- ▁fraud +- ▁corporation +- 見えます +- ัก +- 歌手 +- بخ +- ј +- 藤さん +- ▁있었 +- 找我 +- ▁Hinter +- 並 +- ▁vraiment +- ▁bann +- வ் +- টে +- デビュー +- ▁consolation +- 欣赏 +- 変わった +- 摆脱 +- 我当时 +- 这句话 +- ▁loans +- ▁verkaufen +- 飾 +- ▁Sex +- 飲食店 +- валь +- ▁alexandr +- ▁нормально +- 所谓 +- ▁вещи +- 余り +- υ +- ▁drill +- ▁preceding +- 斗争 +- 我们从 +- ▁Stand +- 的母亲 +- länder +- imiz +- 一些事情 +- ▁zweifel +- ▁quanto +- ▁salon +- 穆 +- ▁misschien +- ▁Motor +- ▁prev +- qü +- ebla +- 指定 +- szcz +- ▁ambitious +- 面包 +- uganda +- 雾 +- 営 +- بور +- рин +- ▁tribu +- 元素 +- ▁alongside +- 哀 +- 했는데 +- ▁negli +- ▁أنها +- 第五 +- ▁Rede +- ▁Nova +- ซ +- 始めて +- ▁fuck +- にとっては +- 问问 +- ▁tenemos +- 狭 +- το +- 人たちが +- ▁kya +- ▁Ing +- ▁हो +- こうして +- obten +- ▁العالم +- 我才 +- ▁antaŭ +- ▁yielded +- ▁treu +- 说说 +- ында +- slag +- ▁bella +- ▁руки +- 편 +- 劇 +- ▁krijg +- '700' +- 输入 +- ▁erzählte +- ▁quasi +- ▁hommes +- 何度も +- lý +- 逐渐 +- ▁банк +- ▁komt +- 咩 +- насць +- ▁dürfen +- ▁прад +- ▁இட +- ங்களை +- noma +- რე +- かれた +- ыць +- 邪恶 +- ▁ĉiuj +- 怎么做 +- 地域の +- ▁cough +- ▁многие +- 膜 +- دام +- ▁calculation +- ▁студ +- 农民 +- に向かって +- ▁machinery +- fühl +- ▁jonge +- 这就是我们 +- 活用 +- 长时间 +- Ө +- モデル +- ▁mandat +- ▁neighbours +- 狙 +- వా +- рот +- ▁clue +- ▁mentre +- алга +- شون +- 这样的人 +- ▁fanden +- ▁weekend +- ▁heroic +- ▁Kamera +- ▁recommended +- ▁Komp +- ▁geschlossen +- ▁Hintergrund +- ▁normally +- ▁viu +- 感染拡大 +- ▁كۆ +- eqq +- 代理 +- ▁زمان +- بدأ +- alde +- ▁түүний +- ▁elector +- 看一下 +- ▁thor +- لند +- ▁побед +- うえで +- 竜 +- ▁проблемы +- ▁стране +- aggi +- چە +- ▁selle +- ストレート +- Ү +- 燃料 +- ▁constructed +- ▁henri +- stricken +- ▁прошло +- ▁misma +- шке +- рош +- èl +- ▁ehemalige +- 亿美元 +- orna +- тая +- న్న +- ということになります +- 青年 +- gezeichnet +- ረ +- Ingenieur +- ショット +- ▁tricks +- 我可 +- ßen +- ▁glowing +- 노 +- ▁hymn +- ▁olive +- ▁towers +- 莲 +- sponsor +- 自体 +- энне +- фо +- ▁feeding +- ▁nueva +- 下一 +- ▁کردند +- makers +- biro +- 合法 +- 在线 +- andra +- ▁spraw +- 再来 +- პ +- 培养 +- 乔治 +- いただきました +- ▁знает +- ▁mason +- gence +- umbu +- どころ +- ▁داشته +- போ +- ▁downstairs +- ▁بم +- 出た +- 是一个非常 +- ▁judged +- ល +- muka +- ▁ashore +- ▁Ausbildung +- opera +- hour +- بوو +- 祸 +- ▁collapse +- 的角色 +- 2% +- 挨 +- ŵ +- 脂 +- vallen +- 急に +- ▁campo +- fili +- ბ +- тары +- mien +- ศ +- ▁intercourse +- ผ +- ▁Rahmen +- пита +- ▁gast +- 好奇 +- 轨 +- 乐队 +- ование +- 没有办法 +- 脑子 +- ▁traveling +- ural +- 笑话 +- ▁polític +- どこに +- 这些是 +- 면서 +- ▁Wy +- ически +- 话说 +- jó +- 過ごし +- ической +- 鈴木 +- に入れ +- jährige +- kurs +- ▁formidable +- ▁pinch +- ▁assigned +- ▁Können +- ▁verdienen +- уют +- werte +- ▁fluid +- ▁پێ +- брос +- ▁avoided +- чих +- ▁memiliki +- バランス +- ▁kell +- ▁Anem +- ▁richtigen +- сси +- ▁amazed +- برد +- بال +- ▁Quant +- ▁могли +- вест +- ▁supplement +- ▁Werkzeug +- 暴露 +- unch +- ▁terrace +- voor +- 戏剧 +- 大好き +- ète +- 姜 +- ▁어떻게 +- ▁Figur +- raba +- ▁sina +- 最佳 +- 廷 +- 八年 +- ▁Rücken +- 大夫 +- lustra +- ▁flush +- ▁difícil +- ▁rejoined +- ▁Oni +- رز +- ▁reinforce +- 女的 +- ▁patterns +- ありますね +- avais +- ▁ceux +- çar +- 膨 +- ▁triste +- 場面 +- ちゃって +- луу +- шиг +- கூட +- 成分 +- ▁senza +- ▁опас +- ▁negoci +- flamm +- wirtschaft +- もそう +- 五百 +- 标签 +- ▁Auge +- woord +- を守る +- 坑 +- アジア +- ▁것도 +- ▁vaccin +- 隐藏 +- ▁côté +- теля +- 复杂的 +- bö +- ▁shells +- 크 +- 履 +- それだけ +- prise +- control +- zwei +- ▁parlament +- Italia +- 邓 +- ▁alto +- ▁chuck +- していない +- ならない +- ▁yaşa +- ให้ +- альна +- шёл +- ▁Obama +- ▁Road +- ▁exclamation +- ▁tới +- شكل +- 给予 +- 有利 +- 現実 +- 跟我们 +- 世界各地 +- astro +- ▁More +- ▁Vergleich +- だということです +- 晃 +- 爆发 +- 父親 +- elimina +- ▁completion +- ▁kostenlos +- ▁wussten +- 年以上 +- 这么多年 +- たかった +- ▁acute +- таў +- ▁conquered +- ▁benshi +- ▁Sau +- ▁torch +- ▁мире +- ▁maintaining +- ▁spider +- child +- ▁baker +- ▁тийм +- 鍋 +- ▁dasselbe +- ▁Best +- ▁offend +- ▁язык +- ▁insects +- 踢 +- ਸ +- ئی +- لَ +- ▁agitation +- Alb +- ▁Bord +- ▁göre +- ▁Quer +- ▁attach +- ▁sinking +- 这可能 +- 呼び +- 朕 +- 彭 +- ▁reluctant +- ியா +- zukommen +- ике +- ▁좋아 +- 国防 +- 掘 +- ▁Museum +- ▁saba +- ▁Zug +- ▁mußte +- лена +- ▁لن +- ▁độ +- 老鼠 +- ▁öffnen +- ▁vorne +- ▁batter +- できません +- Vi +- 资格 +- ▁hazard +- gata +- ▁nancy +- ▁гос +- ▁economi +- 太平洋 +- んじゃないですか +- ▁выгляд +- ▁الأمر +- ▁marvellous +- 西班牙 +- ходить +- ▁Party +- ▁stack +- دىن +- 但我们 +- ▁muchas +- ▁ҡу +- čí +- ▁halbe +- 葡萄 +- ▁Pil +- ▁dwelt +- ▁havis +- ▁adoption +- ▁différent +- 各种各样的 +- 당 +- 会社の +- gebrochen +- ség +- 的消息 +- 勢い +- ின +- ▁participation +- ▁fühlt +- 灵魂 +- 叹 +- дея +- 指控 +- ▁möglichen +- schlaf +- icio +- lili +- ▁aufgenommen +- 各地で +- ▁logical +- 目が +- ▁котором +- ▁competitors +- 待ち +- 配合 +- ▁لأ +- ▁adjustment +- 足球 +- ▁muti +- ▁germ +- ▁이거 +- の情報 +- labora +- っち +- 古代 +- пат +- ▁cai +- 自杀 +- 打击 +- ▁charlie +- лага +- くれた +- ▁север +- kenntnis +- 可爱 +- 公里 +- 看起来像 +- ▁virtual +- ▁guitar +- streit +- gawa +- ▁Such +- 1% +- ▁weiße +- ▁ivan +- ▁خلال +- ▁Wohl +- ▁leise +- ansi +- 逻辑 +- 하게 +- ▁oxford +- ште +- ▁revel +- ▁sahip +- bintu +- ▁tien +- ▁trauma +- ▁trước +- ▁honourable +- ▁probability +- Est +- ▁Mak +- gereza +- 矢 +- ▁lớn +- 回復 +- ▁brisk +- rende +- プレ +- emis +- 我开始 +- ▁potent +- 客气 +- 几次 +- 还记得 +- това +- ▁mevrouw +- ▁eighteenth +- ывает +- ▁fing +- wohl +- こちらは +- 是对的 +- iện +- jana +- 得更 +- 连续 +- ▁süre +- ائل +- தாக +- 在里面 +- 走り +- ▁ardent +- 的事儿 +- buk +- ▁chuyện +- wuchs +- زى +- ▁allgemeine +- 肠 +- 吻 +- 抱歉 +- 得太 +- ゼロ +- 这个东西 +- ▁March +- 听说过 +- ▁زمین +- ▁fragment +- ▁بودند +- 舒服 +- ジャー +- öpfe +- ▁dieselbe +- 要請 +- ▁reasoning +- modell +- لات +- xxam +- 斯坦 +- 的天气 +- ▁خل +- ▁cùng +- introdu +- 有名 +- Й +- 稀 +- meni +- ▁Proto +- 这是你 +- vocation +- 大丈夫です +- ▁плане +- なもの +- ▁Erfahrungen +- しましたが +- 賃 +- ▁welcher +- ▁riep +- ▁legisla +- けた +- ▁мной +- hong +- ▁você +- ▁baseball +- ▁slap +- objet +- ▁Nda +- ▁شيء +- ಯ +- ijas +- vēl +- ĝo +- mada +- ▁mystic +- EC +- 課 +- ▁experts +- 杂志 +- 昭和 +- 因为这 +- ▁yose +- ▁preference +- ▁Flug +- 簡単 +- ▁impatience +- 쓰 +- プレゼント +- หน +- ▁ولی +- ▁slay +- ▁så +- 今後の +- ▁числе +- ▁ຢູ່ +- ▁хотите +- ▁никаких +- ▁நட +- lette +- mong +- していると +- ▁več +- ▁dismissed +- ▁Wissenschaftler +- ▁liquor +- ▁pursuing +- を目指す +- glaub +- бро +- ▁buff +- 下班 +- ▁ilk +- ▁Untersuchung +- ▁Tradition +- ▁linked +- ▁knit +- ▁successor +- linie +- ▁Matt +- ▁количество +- ▁French +- センチ +- நேர +- ário +- ▁insect +- aigua +- qq +- アフリカ +- ރު +- キング +- の一つ +- ▁converted +- ▁vault +- wain +- schel +- samkeit +- ỉ +- ▁personnes +- ▁staircase +- 咨询 +- ▁slumber +- ▁Со +- corr +- schicht +- ▁clasped +- sigur +- ▁concur +- 姉 +- ▁hẽe +- ▁pueblo +- ▁Cat +- 任何事情 +- ▁جهان +- 去哪儿 +- нных +- marin +- kaya +- ▁Todes +- ләр +- ▁Gan +- ੇ +- ▁routine +- 竞选 +- 如果是 +- 生病 +- ▁punished +- ▁libre +- قات +- ▁bamb +- ▁demonstration +- ▁retained +- ▁nhìn +- ▁엄마 +- ▁Worten +- kapa +- ල් +- ▁siege +- ▁üç +- を伝え +- 女生 +- ▁schützen +- ▁família +- 严格 +- ▁singer +- 青春 +- ▁Besitz +- ▁poems +- しております +- 考试 +- わら +- 女の子 +- バル +- ▁Merc +- ▁scope +- なきゃ +- 不是一个 +- ▁loyalty +- 躺 +- 研究所 +- ▁juffrouw +- 英尺 +- ▁verkauft +- груз +- ▁jongen +- 贝尔 +- ▁أع +- ▁pai +- 读书 +- 现在已经 +- 问道 +- 很长 +- щих +- esca +- ckel +- ▁thanked +- ▁Produktion +- ▁Milliarden +- 子供たち +- ▁bodily +- gada +- 鉄道 +- گل +- 显 +- ▁Both +- ▁carrier +- fér +- aime +- 的许多 +- arrêt +- profit +- ▁breathless +- いたら +- 妖 +- が一番 +- ▁verbessern +- 瘦 +- ▁mall +- ないので +- ▁traces +- ▁timp +- 后悔 +- téri +- 向前 +- یز +- 範囲 +- ▁dealt +- 乖 +- ▁desirable +- 去看看 +- 考える +- ▁erster +- лик +- ▁рассказыва +- サイト +- ıldı +- клон +- 即使是 +- ▁Home +- ngingo +- 際に +- ▁abode +- してます +- ▁всю +- ▁près +- 興味 +- 街道 +- wè +- ški +- ▁precaution +- 芽 +- ▁원래 +- 解决方案 +- ▁이러 +- 届け +- ▁collective +- ▁pious +- kina +- ▁Struktur +- tata +- 든 +- ▁trotzdem +- AR +- ▁offensive +- おき +- Tech +- ▁Ал +- 最后一个 +- ▁Dorf +- ▁Deutschland +- ちゃんの +- してほしい +- ▁streng +- வும் +- ▁horrid +- ▁Kontakt +- ▁molly +- 牧师 +- sprache +- ▁Haushalt +- 昌 +- ▁Fünf +- ▁regain +- ▁Ländern +- 考えた +- 一起去 +- ህ +- ▁terrified +- ▁learnt +- ▁witnessed +- ▁trov +- ▁keiner +- ▁Beziehungen +- 把我们 +- زل +- ▁amafaranga +- 起来了 +- ▁franchise +- ▁abundance +- ▁atlantic +- ▁airport +- كس +- せない +- kong +- ▁conclu +- 的态度 +- 的音乐 +- ▁Sind +- 蜂 +- ▁nữa +- たんですけど +- 回报 +- ுடைய +- ▁domini +- ▁shillings +- ▁encara +- ▁entgegen +- ţă +- виз +- ▁обще +- ަށް +- ▁Verwaltung +- ▁شروع +- ▁Aktivität +- 癌症 +- yandi +- ▁seulement +- 得好 +- esprit +- yaga +- 想办法 +- ▁Francisco +- の予想 +- ▁Wein +- 晶 +- ït +- تنا +- ▁serie +- ▁characteristics +- ▁mesmo +- ▁Schulter +- 阔 +- ▁کے +- laki +- nood +- 的状态 +- sett +- フト +- ▁Virginia +- メーカー +- ▁acum +- ▁Vila +- muş +- кана +- カラ +- ▁tract +- ▁шар +- fordern +- スマホ +- 季節 +- ▁داده +- ново +- 減少 +- 任何东西 +- ▁части +- ები +- යේ +- へん +- ▁consolid +- 惩罚 +- ▁Krebs +- ▁pregunta +- ▁дараа +- ▁barri +- ▁кроме +- ▁поле +- 受欢迎 +- коў +- lux +- 柜 +- iek +- 店舗 +- itari +- 参考 +- भा +- ▁договор +- ▁recess +- atura +- 识别 +- ▁bieten +- ▁என +- 換 +- ▁Fortschritt +- ▁trotz +- ▁youngest +- कार +- 对对对 +- க்கிற +- 跑了 +- 予約 +- 颗 +- ▁lawyers +- ▁своим +- ▁Nya +- 嫂子 +- ▁mining +- ▁submitted +- ▁кил +- ▁guided +- 女性の +- 안 +- 迁 +- ทํา +- ▁bắt +- ওয়া +- 温泉 +- नी +- ▁bike +- ▁tossed +- ஸ்ட +- ▁Brand +- ▁ثم +- ▁Ти +- 纠 +- ▁சரி +- 었어 +- ▁emerged +- ▁versuche +- これまでの +- 包含 +- ▁offended +- ▁già +- ▁passer +- 您说 +- 锦 +- klin +- ▁rechten +- 地球上 +- тара +- ▁machten +- 下次 +- ▁privat +- 疾 +- ను +- ▁slice +- தற்கு +- ▁destination +- てしまった +- дали +- 你可能会 +- ▁comprehensive +- ワイ +- 数が +- τα +- amiento +- рать +- ▁Theorie +- らせ +- Music +- ▁columns +- ▁зрения +- 坊 +- ▁incapable +- 내 +- 一根 +- ▁Jun +- ▁guerre +- ▁prudence +- ▁spielte +- жим +- kiwa +- කි +- ▁relax +- ifiziert +- ▁Slide +- ▁errand +- ▁drawer +- 年生 +- 落とし +- てない +- ▁reserved +- ▁мира +- 惹 +- 鶏 +- ▁suffice +- ▁premium +- ▁handful +- été +- ▁олон +- であれば +- party +- ▁истории +- 看待 +- ▁работы +- ▁اینکه +- ▁borders +- 最大の +- енным +- 終了 +- čno +- ▁winding +- 加拿大 +- あんな +- ▁Johnson +- ってください +- beera +- ▁dreaming +- ▁tropical +- 方案 +- ویل +- ▁georgia +- සා +- ▁있고 +- ▁amidst +- 扯 +- เขา +- ▁emerging +- ▁Roger +- ▁projet +- ستی +- ▁Gel +- ▁drap +- ▁spit +- hund +- мак +- 议员 +- 际 +- zusetzen +- ピッチャー +- 意大利 +- விட +- رض +- ▁rép +- ▁хө +- ▁Long +- 带来的 +- ▁слож +- 扮演 +- нк +- ▁அறி +- ▁converse +- 超越 +- 引き続き +- JR +- 大手 +- fowl +- pata +- ▁goddess +- 妃 +- ▁commend +- ディー +- рис +- ▁Hotel +- ラスト +- ním +- rän +- gah +- 多个 +- 教え +- 佐藤 +- ▁boldly +- 悩み +- ▁которого +- 自転車 +- ちゃんが +- 核心 +- vacu +- ▁resent +- ▁último +- 的大脑 +- 发言 +- cule +- ▁wählen +- ― +- 辱 +- 강 +- ruka +- 傾向 +- еду +- ▁reicht +- ▁répondit +- дин +- 绳 +- 雕 +- 你知道我 +- 工作人员 +- ▁boiling +- ▁моск +- 顺利 +- ▁polic +- terior +- ▁sect +- 荣誉 +- хэн +- わせ +- irdi +- itatea +- ▁draft +- 的衣服 +- تور +- 3% +- 堵 +- யோ +- ාව +- να +- raub +- 悟 +- 侯 +- ▁entschieden +- ▁Zeichen +- 感謝 +- ▁Planeten +- ▁fueron +- 就知道 +- ▁loi +- iteit +- ▁gestern +- ▁otras +- ▁Öl +- ▁neighbour +- ▁standpoint +- сар +- 完全に +- 不舒服 +- VTR +- ▁spaniards +- ▁Sozial +- ▁dirt +- ▁hwn +- 颜 +- 晨 +- abanya +- рий +- ▁chiar +- 斤 +- ▁getötet +- ▁unnecessary +- bizi +- brew +- ▁پار +- Plattform +- ▁gewonnen +- 規制 +- 发送 +- を使う +- ▁priority +- ▁Geheimnis +- ▁suspended +- 同時に +- geteilt +- ▁willingly +- ▁seien +- ▁Unser +- 不动 +- ▁Fund +- AS +- ▁nama +- ކަ +- 感动 +- ţii +- american +- 你们都 +- ▁justify +- ▁steward +- ▁Kim +- 舅 +- мур +- 袭击 +- اشت +- ▁реально +- 正直 +- 没问题 +- līdz +- 最後は +- 阶 +- 남 +- ▁bản +- ▁이게 +- ▁இரண்டு +- 歴 +- ▁нэр +- ▁Wol +- ▁clung +- 大使 +- ▁Flo +- ▁Industrie +- udge +- ▁massa +- ▁此外 +- ▁clergy +- のもと +- 免疫 +- ▁работает +- 相撲 +- ▁থাক +- ▁candidate +- かなと思います +- 跟大家 +- ordre +- hala +- ▁canadian +- 小朋友 +- aña +- ▁coordina +- teeka +- plin +- といって +- 抗议 +- 羞 +- ង +- ▁spreading +- ▁sadece +- ▁einzelne +- 的任务 +- ▁Tā +- ▁sinh +- ▁allies +- ▁behave +- ▁wildly +- ▁generated +- パラリンピック +- 这段 +- 明星 +- سبب +- 時間を +- ▁strand +- presi +- 준 +- 抱怨 +- 首脳 +- ▁Market +- ▁Harris +- ▁Unit +- atrice +- boden +- 清晰 +- ماذا +- 大学の +- ţe +- விட்ட +- ▁innovative +- ▁database +- 肯定会 +- ▁capa +- ▁actua +- morph +- ▁einzelnen +- bali +- ▁juan +- いだ +- 忍受 +- ▁creator +- ▁Denk +- urwa +- ս +- ▁artillery +- މަ +- ▁توسط +- ▁gilbert +- んや +- stürzt +- ▁telah +- あい +- ▁situé +- 笔记 +- ってくれ +- ضحك +- ▁anthony +- wissenschaft +- しゃ +- ▁tante +- 一点点 +- ▁большая +- menya +- ӗ +- ရ +- 规划 +- ▁vilaĝo +- 合って +- ▁preacher +- ▁ноль +- ▁verder +- 从事 +- ▁spreche +- ▁potatoes +- ▁corpo +- था +- ▁faz +- ਨ +- ▁geliebt +- 放松 +- 的产品 +- ▁Angriff +- ▁thật +- ▁irgendwo +- curi +- 高校生 +- ▁petr +- ▁Their +- проч +- ニュー +- ▁spared +- ފ +- 这个节目 +- 戒 +- ▁Nachde +- 千葉 +- ibyo +- τη +- 就好 +- ▁fortnight +- 周末 +- 見ていきます +- 我想知道 +- хам +- ▁consumption +- ▁আৰু +- ▁sympathetic +- ▁Konzept +- んだよね +- ▁Geräusch +- んだって +- ▁subsequently +- ▁Russia +- ▁کسی +- ことがある +- ▁afforded +- ự +- まえ +- pfa +- flug +- ▁queste +- ▁progressive +- 似的 +- mita +- кет +- ▁sentir +- 桌子 +- ▁хочет +- ▁Rasse +- ▁Fällen +- anın +- があるんです +- 築 +- рван +- と共に +- ▁Dick +- マジ +- 此时 +- ▁Spo +- ▁bonnet +- ҥ +- āju +- 扩大 +- 这两 +- 体重 +- ෙන් +- шил +- مَ +- 也能 +- tiere +- 但在 +- 唤 +- ▁çocuk +- ▁heutigen +- 冬天 +- ▁Put +- 彼らは +- ▁guarded +- ▁نشان +- 向我 +- قام +- stained +- ▁estoy +- 靠近 +- ▁protein +- ப்பட்டது +- 争论 +- ați +- ่น +- ▁buchstäblich +- ▁усё +- ▁Lor +- тура +- ▁pensi +- まん +- وض +- ▁evangeli +- 是非 +- полага +- 原始 +- عي +- ிலும் +- мян +- 不信 +- afi +- ድ +- 拓 +- ふだん +- 消失了 +- ▁يتم +- ▁amusing +- ▁punch +- ▁när +- 听到了 +- 资产 +- ▁Kath +- 哭了 +- ▁tremble +- ▁leiden +- ánh +- 有助于 +- ▁prosper +- itse +- ▁عليه +- рев +- சை +- ▁Haut +- ▁внимание +- 详细 +- ▁있잖아 +- 投資 +- ▁garanti +- град +- ▁가서 +- ▁другом +- ごと +- 新鲜 +- ▁faintly +- ▁Amazon +- ▁Ан +- 耀 +- ▁Erklärung +- IC +- 反而 +- quarter +- ▁来一首 +- 性别 +- 罩 +- ▁charlotte +- ▁attained +- ▁детей +- ▁profond +- ▁نیز +- ▁Яны +- 但是如果 +- ▁Männern +- ▁поскольку +- 配置 +- gespielt +- ▁eran +- ▁Fur +- ▁algunos +- ▁unua +- ▁отношения +- 菊 +- 姐妹 +- 選手権 +- 护理 +- ▁strive +- wright +- 誓 +- ▁halted +- Argent +- ▁поводу +- ▁morir +- ▁Gewinn +- ▁seguir +- ▁requested +- '37' +- ▁Alice +- 我跟你 +- ▁schlimm +- ▁генерал +- 挖 +- χ +- ▁кооператив +- 蓄 +- 自動車 +- ▁merchants +- ▁그래도 +- autor +- ▁Те +- ▁wichtigsten +- 考虑到 +- нә +- ▁Ray +- 埼玉県 +- ź +- 咪 +- ▁удар +- 広がり +- 叫什么 +- 或许 +- ûn +- ▁November +- geworfen +- ধা +- ▁assuming +- ▁Außen +- ▁Debatte +- ▁nhân +- 利亚 +- ▁muslim +- гээ +- Michel +- ▁serait +- 山さん +- ன்று +- bris +- ▁telegram +- 笨 +- 陶 +- عرض +- '198' +- 調べています +- 会发生什么 +- ▁smash +- ▁петр +- 盲 +- ෂ +- ▁вроде +- ▁sólo +- ▁Times +- 信用 +- 술 +- 파 +- 訪問 +- ▁mı +- komst +- ඳ +- ▁александр +- ▁happier +- 毫无 +- ▁Roma +- ▁kalt +- niveau +- қә +- kurikira +- 終わった +- твар +- poro +- аваць +- 你就是 +- ▁deserved +- īja +- '33' +- ¡ +- 陰 +- ▁speculation +- 警察官 +- 大きさ +- 八十 +- visa +- ▁rey +- 十万 +- seks +- 也得 +- 去年の +- ▁Andrew +- 我刚才 +- 这可能是 +- tausch +- 继 +- ▁conspicuous +- ▁remembrance +- gebung +- ▁скажу +- 旅游 +- ▁jedno +- schreiben +- 相处 +- ▁respectful +- ▁plague +- すべての +- 不幸 +- чна +- ென் +- kuta +- ▁vehicles +- stell +- 每周 +- 之处 +- 给您 +- ацион +- ▁Anna +- わけですね +- म् +- ▁tempted +- おととい +- ▁جای +- 而已 +- 赤ちゃん +- covid +- page +- ระ +- ▁fig +- ッツ +- 年前の +- ▁theories +- 究竟 +- − +- wanga +- prost +- ▁pony +- ▁heures +- ▁samuel +- seitig +- 就跟 +- ▁дуу +- どうなる +- かかり +- 整天 +- もう一つ +- 倾向于 +- 溪 +- ▁prevailed +- ▁یاد +- ▁climbing +- пот +- ▁sõ +- führt +- чо +- ▁investigate +- sobola +- jski +- 忧 +- दि +- ▁mike +- ▁Song +- wish +- ▁Tä +- 形状 +- စ +- asyon +- ▁roast +- haired +- ▁vais +- 記者 +- тим +- ▁luke +- ඒ +- ặ +- ▁confederate +- 올 +- ▁masa +- credi +- ەکان +- Work +- udio +- ▁disagree +- 向他 +- 傘 +- kämpft +- ▁довольно +- Смех +- ▁vraag +- drag +- ▁proto +- ▁sûr +- 的建议 +- ▁seeming +- حمل +- 耍 +- ▁whereupon +- ▁carries +- 抵抗 +- の結果 +- 陵 +- ▁Zwischen +- 매 +- 菅 +- 博物馆 +- 六个 +- werken +- すぎる +- ▁darunter +- ▁intervention +- を示しました +- ▁implementation +- 共有 +- cji +- ▁embargo +- 줄 +- ▁negotiations +- ▁torrent +- rilla +- 都已经 +- 更重要 +- 水分 +- డు +- ▁شوند +- ▁null +- خارج +- ▁usage +- をしていた +- 扶 +- 문 +- 산 +- 列表 +- ▁suited +- улы +- ▁выступ +- それも +- 袖 +- 次は +- 始めました +- ▁approximately +- を続けて +- 这座 +- ێن +- ▁compass +- ций +- ▁quaranta +- ▁tym +- ▁bibli +- مات +- ▁بىر +- klad +- ировал +- 你不要 +- ▁необходимо +- ▁promising +- ▁Meilen +- lege +- loge +- ▁figured +- 申し +- ときは +- ▁equality +- 之类的 +- ▁erhob +- ケーキ +- 認識 +- ▁reconcile +- ▁yabo +- ▁debat +- コード +- haya +- 那我就 +- 惨 +- 昆 +- ▁слишком +- お互い +- 交渉 +- ▁Daher +- ▁plea +- ▁habia +- 会让 +- чны +- 孕 +- 笑了 +- 喜马拉雅 +- 訓練 +- ▁говорили +- ▁места +- 編 +- рма +- 組み +- 就开始 +- त्र +- ▁تخ +- вший +- ▁Grenzen +- 我以为 +- 注册 +- 伝統 +- ரே +- ▁Sendung +- ▁souvent +- 绝望 +- ▁gesicht +- гьы +- かけた +- ▁franz +- 砸 +- ▁ఆఁ +- ▁pauvre +- ▁rejoice +- 本周 +- lamp +- נ +- ▁cuộc +- ▁donald +- 行きます +- 这是一个非常 +- тав +- 上がった +- 就是一个 +- istischen +- ή +- кина +- に行った +- ▁Nick +- నా +- ▁clearing +- ▁tinha +- 에는 +- タル +- 乃 +- zeichnung +- ▁товарищество +- 桜 +- 卒業 +- يه +- ▁luz +- 왔 +- ▁qo +- ladı +- baum +- 完美的 +- 疗 +- いるという +- ▁verdient +- 오 +- ▁слыш +- ище +- ▁tact +- middel +- 迎えた +- 玲 +- お二人 +- ▁curtains +- alimenta +- ▁tooth +- のですが +- ▁foresee +- Risas +- àtic +- コメント +- 产业 +- ▁Lager +- gewalt +- んでしょう +- حص +- ▁fühle +- ▁respected +- 宣传 +- 行吗 +- たこと +- चा +- bürger +- ▁alchemist +- ▁Jones +- чака +- ▁шест +- ľ +- 俱乐部 +- ▁денег +- 繰り +- karte +- 黎 +- 比你 +- ▁shrill +- піс +- ▁accordance +- තා +- ▁بشكل +- 营销 +- ▁halo +- ▁washing +- ▁heath +- ワーク +- ▁mature +- ▁Wohnung +- ▁representatives +- information +- ھا +- material +- ▁rosy +- ▁regulate +- ▁cinquanta +- ▁veteran +- dov +- 只需要 +- ▁hull +- ▁但是他们 +- 跟她 +- 产生了 +- 時には +- 一颗 +- ▁riot +- 这并不 +- ▁pagan +- ▁veux +- ▁evolu +- luck +- ▁será +- ▁übrig +- ▁resta +- 负担 +- ▁başka +- ▁пример +- ▁parle +- ন্ত +- estro +- ▁Regeln +- vió +- 尿 +- ුණ +- 素晴らしい +- ▁தொ +- pí +- phra +- ▁efter +- 糸 +- ု +- 楽しめる +- ▁persian +- ▁Fil +- ▁возник +- 了下来 +- ▁willkommen +- 饱 +- 糟 +- 畑 +- ▁gigantic +- ▁buffalo +- ▁meditation +- ユー +- ▁solemnly +- よろしくお願いします +- ▁explica +- 溜 +- ਦ +- ▁wünschte +- 備え +- lige +- ubwi +- виду +- 我不认为 +- очка +- خان +- ancien +- ▁supposing +- ▁gloves +- ▁lahko +- ▁работу +- 访 +- sional +- 所以我想 +- ከ +- ገ +- และ +- シリーズ +- てくれた +- ▁смог +- ▁Lied +- の方に +- ▁confront +- ▁சில +- ▁parler +- ▁agnes +- 坎 +- 撕 +- ▁medizinische +- 少数 +- Man +- 正义 +- 微博 +- ıcı +- ▁jungle +- طب +- ▁vicar +- ▁Grad +- ▁thereof +- 像你 +- lach +- '38' +- ▁Verständnis +- ▁энерг +- ギャ +- ▁blog +- 一个新的 +- ▁samo +- IP +- ▁junior +- ▁kümmern +- ▁commis +- burger +- 機会 +- ▁хол +- ブリ +- ▁توان +- อง +- 沼 +- ▁calendar +- ▁angefangen +- ▁கொ +- 这时 +- 敏感 +- ▁tlie +- というのを +- ▁fürchte +- excursió +- ራ +- ツアー +- 天堂 +- ▁oficial +- 達成 +- bouw +- ▁forbidden +- 全ての +- 価値 +- ▁bruce +- ジー +- 胶 +- ▁Lernen +- 我有一个 +- 野党 +- обр +- あさって +- いるのか +- ▁nützlich +- ▁partial +- ▁tempt +- يون +- シュート +- ▁dachten +- 平安 +- ▁ruled +- ▁integrated +- 新型コロナの +- ▁grind +- vina +- ▁сильно +- ▁месца +- んだから +- 可以用 +- ▁dernier +- сет +- cato +- ▁голову +- raient +- ▁ayuda +- くなった +- 開幕 +- baye +- ▁гри +- 所以它 +- 及时 +- プラス +- ნი +- 驾 +- ▁mortgage +- ▁awesome +- 怀孕 +- ▁нават +- дела +- ▁float +- ▁Revolution +- 一看 +- 違います +- ▁садовое +- 槽 +- ▁invasion +- ▁особо +- ▁splash +- 相手の +- عاد +- ▁dla +- 国連 +- ▁contend +- ▁bubble +- ਹ +- 只会 +- ▁tính +- ▁martha +- ▁چرا +- 你不知道 +- risti +- ▁хто +- の天気 +- paka +- ▁المت +- 建て +- коммерческое +- ▁tersebut +- 細胞 +- ▁snapped +- حدود +- 进化 +- natured +- 围绕 +- toren +- 臓 +- 稼 +- 我们认为 +- 新型コロナウイルスの +- ▁adequate +- 监控 +- ▁precipita +- ▁lorsqu +- venue +- 股票 +- ったこと +- ▁inspire +- ▁verließ +- ▁Га +- ▁sensor +- 空中 +- ठ +- ਕ +- ▁Aspekt +- ▁intens +- শা +- 見つかった +- ▁remedy +- みたいに +- ったのは +- ▁greatness +- ▁comunica +- гул +- ఆ +- かさ +- っていうか +- 楽しんで +- 立法 +- ▁müsste +- licit +- BC +- ▁دهد +- ▁Ansicht +- ▁sozusagen +- ▁awhile +- த்துக் +- ▁Apple +- ▁obliga +- ▁dolor +- іль +- ுள்ளது +- ▁Vari +- 批准 +- ▁components +- ▁Latin +- ▁rebellion +- 预算 +- ▁genial +- ক্ত +- 爆発 +- ▁Mannes +- tawa +- ▁சொல்ல +- 炒 +- ▁thursday +- මු +- гур +- ▁gasp +- ▁focusing +- ▁June +- 拿出 +- ▁Bald +- ▁естественно +- ▁настоящ +- 棍 +- 記憶 +- ▁chariot +- ▁comrade +- ▁лицо +- ▁rachel +- メント +- 九点 +- ▁altri +- 有更多的 +- ▁nuit +- 㗎 +- エル +- ▁resolute +- ensemble +- চি +- 就得 +- 的项目 +- ருக்கு +- ▁nannte +- ̣ +- ▁Verantwortung +- 所以这是 +- 将会 +- ▁lemon +- gänge +- 涉及 +- ョ +- kili +- eṭṭ +- ▁Antrag +- ▁blanche +- されていました +- ▁Gru +- 戻って +- ▁تنها +- ことによって +- ▁Town +- ▁leider +- mali +- ▁yep +- 你妈 +- 塗 +- 梅雨 +- ▁добро +- 的速度 +- ▁которым +- 芸人 +- 選手たち +- kord +- bald +- ▁Bol +- plant +- agh +- 伸ばし +- кала +- 我们不能 +- 欣 +- ▁preach +- ▁breit +- ▁இருக்க +- ▁செய்ய +- ▁pilgrim +- ▁voix +- ▁alguns +- ▁veya +- 请问 +- заў +- どこか +- ического +- ती +- rauch +- 的概念 +- PR +- щие +- ranye +- ▁één +- ▁fé +- 政治家 +- drop +- 取消 +- とみられる +- ▁accomplishment +- ▁vegeta +- ▁camin +- アプリ +- ▁assumptions +- mütig +- ailleurs +- ▁Во +- wadde +- prozess +- ▁resulting +- жды +- こん +- открыт +- асці +- ފަ +- ensor +- zaam +- ▁actively +- ksa +- ▁Ці +- juk +- эш +- körper +- インタビュー +- 来来来 +- yama +- чным +- issait +- ▁uncommon +- ▁despatch +- 但他们 +- ▁klas +- ▁Valley +- ▁часто +- ▁tế +- 奪 +- ▁Open +- န +- ▁façon +- なんでしょうか +- ▁gag +- ▁Start +- ▁отдел +- 说法 +- ▁добры +- いこう +- grä +- ▁louise +- ▁gewisser +- reiche +- 崩れ +- ▁fulfilled +- 措置 +- 这话 +- ▁gedaan +- いたい +- ▁wail +- diye +- 近づ +- ▁тобой +- ▁sheriff +- 限定 +- 校长 +- 運営 +- ▁gyfer +- adju +- そうですよね +- 柯 +- ▁occurrence +- ട +- 这一次 +- ▁folgte +- 过得 +- 的专辑 +- 夜晚 +- 事務所 +- 惜 +- ▁aunque +- 等到 +- ボールを +- 夏の +- ▁falta +- ▁frühen +- ▁daraus +- ▁saith +- ▁Mira +- ▁sanft +- 尴尬 +- んですかね +- ▁religiöse +- ▁nennt +- дель +- ▁شک +- ▁childish +- ▁мнение +- ▁stalk +- ▁خوش +- Һ +- ▁bởi +- 見込みです +- ▁kişi +- ▁acaba +- ▁Live +- ruff +- ▁approve +- ေ +- 賛 +- züge +- রু +- 工夫 +- 铜 +- ▁همچنین +- 형 +- ▁reflecting +- 入れた +- 也不知道 +- 醒来 +- related +- аас +- ▁filter +- ▁sunny +- ▁cuenta +- 蛮 +- 熬 +- ▁catherine +- ▁respekt +- 尽快 +- ▁vertraut +- ▁cautious +- 겠다 +- kami +- ▁États +- ▁warrior +- ▁circular +- ▁orang +- チェ +- ▁specially +- قان +- ▁doom +- 裔 +- ▁kubi +- ▁raid +- わず +- 你又 +- ▁erklärte +- われた +- hurst +- ▁strait +- 弄清楚 +- ▁класс +- 它可以 +- ありますか +- 拐 +- 체 +- バイデン大統領 +- 揺れ +- gibt +- reise +- いただいた +- ▁dump +- ▁meek +- ▁hayi +- proof +- fassung +- ▁Stock +- 坡 +- 煤 +- 遗憾 +- ▁Experiment +- トマト +- 协会 +- ▁contribu +- 东北 +- ▁aynı +- sız +- ▁geschaffen +- ▁secondary +- ▁pitt +- auto +- гуля +- mies +- lohn +- 離れた +- сот +- 所有人都 +- スペイン +- 維持 +- ▁Tema +- ▁আমি +- 日まで +- 銀行 +- 整理 +- 原子 +- 殿下 +- вернул +- ▁goede +- ▁relates +- 近くの +- ▁клуб +- ▁douglas +- 仪式 +- ▁paz +- ▁implica +- কি +- korera +- 试试 +- ▁translated +- ▁тое +- 单独 +- ியது +- ▁algorithm +- 몇 +- ▁случай +- ▁anterior +- 俳優 +- ▁comic +- ▁candidates +- もうちょっと +- 一緒 +- ość +- ▁nước +- ▁thay +- ▁policeman +- 足を +- 測 +- ▁combine +- ார்கள் +- ▁Pour +- ▁gaining +- 更容易 +- ▁bloß +- ▁Punkte +- જ +- ▁chocolate +- ည +- 労働 +- それぞれの +- ▁robbed +- ▁fourteenth +- 专辑 +- 物価 +- 北方 +- یده +- ▁commanding +- বু +- ▁joyous +- ▁drehte +- ▁товарищ +- ▁eingesetzt +- ▁sullen +- 五千 +- 姑 +- ▁девушк +- 我以前 +- ▁elderly +- を取って +- けさ +- 日目 +- 织 +- ▁elektr +- ▁hearty +- ▁trigger +- トル +- ▁waarom +- ▁zač +- หล +- ▁schaute +- ▁starve +- erade +- ▁yapma +- ▁precise +- ▁kinderen +- ▁حول +- rolle +- ▁productive +- мова +- ▁bằng +- 酬 +- ▁შე +- ulin +- ▁regulations +- 直後 +- ▁насколько +- 祝福 +- 结论 +- ▁Kanada +- ▁philosophical +- ▁slash +- dynami +- 他是一个 +- くなります +- ▁shelf +- '95' +- ますけど +- ▁deprived +- ▁stray +- 出门 +- 小屋 +- そうそう +- вался +- ▁mujer +- ու +- 泄 +- ▁моей +- 窃 +- ▁überrascht +- ▁diğer +- ▁حتى +- ▁loin +- 雨雲が +- ▁importantly +- ▁fiery +- ▁contradict +- ▁Яна +- ▁balloon +- ając +- ▁einfacher +- ▁bestowed +- 島の +- город +- ▁moyen +- 浪漫 +- ▁தலை +- ▁fortress +- ▁lebih +- schild +- nimi +- coli +- というような +- ▁sprak +- ▁fright +- ▁benutzen +- 吞 +- ▁Schwierigkeiten +- ▁estão +- ▁vide +- ▁frederick +- 延長 +- 的城市 +- ināt +- 貌 +- ▁Hey +- 雰囲気 +- ▁aggr +- ▁wiederholt +- າຍ +- ▁musket +- ▁swan +- TO +- ▁Gang +- cala +- ێک +- look +- ▁الْ +- 社員 +- ▁wales +- ▁problème +- こちらも +- zusehen +- ▁پو +- sichtig +- ▁electronic +- гээр +- 把你的 +- ▁server +- ▁exertion +- 屏 +- ല +- ▁ejemplo +- ▁invariably +- fugi +- 外部 +- gruppe +- mord +- 為 +- ந்தி +- ▁señora +- двига +- ▁exert +- 諸 +- ▁كبير +- ▁feedback +- ▁Terra +- 大切な +- ющий +- そこまで +- чар +- ▁Var +- 食べた +- ▁glancing +- 薪 +- 还有什么 +- ▁compact +- 审查 +- 原さん +- ▁хоть +- ▁أحد +- mén +- 絶対に +- ▁merge +- ってきて +- ▁swallowed +- ▁Wachstum +- ▁Technik +- مدينة +- ▁fossil +- ▁organiza +- 把这些 +- 引っ +- を見つけ +- 阁 +- てほしい +- нө +- ▁impart +- いますね +- ▁numero +- あふれ +- 立場 +- kutu +- ▁correspondence +- 增加了 +- ▁ragged +- А +- ▁hindi +- ▁dyn +- ▁successive +- がこちら +- ▁verdammt +- 严肃 +- ▁barren +- ▁prairie +- లు +- ▁Ora +- ▁понять +- ▁vamos +- ▁conventional +- に参加 +- ▁vegetables +- ▁стали +- ▁تغییر +- 希腊 +- ▁kelkaj +- 算了 +- '48' +- ацыі +- ▁kitty +- 踊 +- ▁Wahrscheinlich +- meza +- ▁swore +- ▁cheerfully +- zego +- ▁storage +- تك +- ▁bezahlt +- muz +- 五月 +- jol +- дей +- oğu +- icamente +- ▁hinauf +- син +- ▁prova +- ▁letzter +- ▁temperament +- ▁rencontre +- ▁influenced +- ラーメン +- ▁некалькі +- ▁parlour +- ▁Mariya +- fläche +- ▁Meister +- 提出了 +- を求め +- 有意思 +- няў +- ▁polar +- ▁Verwendung +- ▁стен +- ەم +- 的作品 +- ▁verstand +- 所以如果你 +- ▁mieux +- ▁요즘 +- ▁diversity +- бак +- 恐れ +- 案例 +- 洲 +- ▁tamam +- ▁salary +- ▁возможность +- 它将 +- kret +- 氧 +- 龍 +- ▁industries +- chá +- ▁Vier +- ▁swinging +- ▁bacon +- пись +- ▁edith +- 诶 +- ັດ +- ▁знаком +- фу +- ою +- ừng +- ▁impress +- 立って +- ões +- хад +- 锐 +- ▁resemblance +- 罢 +- 拯救 +- 我也是 +- ▁tightly +- けがを +- лева +- 货币 +- ▁beautifully +- యి +- ▁Histori +- ▁Harry +- 訴え +- 集団 +- даль +- 短信 +- 可以通过 +- лаш +- 苗 +- ▁summary +- 프 +- ▁первых +- 企画 +- oxy +- っつ +- ▁celebra +- 准备好了 +- ▁обрат +- 大胆 +- tuig +- θ +- ▁mabel +- бла +- метр +- 50% +- ▁fugitive +- ▁babe +- だよね +- 名单 +- ▁haunted +- 做到了 +- lerde +- white +- ▁technische +- 意味で +- 脂肪 +- ▁dissolve +- zze +- ▁일단 +- ▁loath +- ▁intensity +- 提案 +- 提交 +- ▁steckt +- 耶稣 +- ▁orbit +- ▁русски +- ▁fiercely +- 年後 +- 只是一个 +- Ex +- ▁sakj +- 查看 +- ▁آر +- ▁Ost +- ▁stump +- ▁enthalten +- ▁bitterness +- 会合 +- 他又 +- ▁hesitate +- 神奈川県 +- 键 +- كار +- 鹰 +- ▁درباره +- жиг +- ▁cabeza +- lada +- ▁bidding +- тыш +- ▁cautiously +- 主播 +- Gelächter +- ▁erlaubt +- ▁reconciliation +- 崩溃 +- وارد +- pion +- ▁fapt +- 協議 +- ▁gemeinsame +- 키 +- ▁دارید +- ▁Qual +- 東日本 +- 三千 +- ▁میں +- ▁venus +- ▁undertaking +- 所以我认为 +- 克拉 +- ▁câu +- われる +- гэр +- aurait +- ▁никак +- かわい +- 别说 +- hilfe +- ▁writ +- beug +- ▁sombre +- ptic +- 如果有 +- NG +- limi +- 的事实 +- ▁cảm +- rott +- ▁район +- ▁desolate +- ▁picturesque +- বো +- 晕 +- ذهب +- ることができる +- ▁weinig +- ▁siya +- ▁stain +- 惑 +- ▁pourquoi +- ▁строительный +- ▁doğru +- ▁Institution +- 送给 +- ▁Indien +- 干啥 +- design +- ゆっくり +- 近い +- ▁Sicher +- ▁раньше +- 용 +- ▁effektiv +- NATO +- ▁incentive +- ▁assisted +- 什么东西 +- ▁stirring +- ▁corresponding +- हि +- 复制 +- 窝 +- ▁Füße +- ▁tất +- ències +- ▁Ati +- 钥匙 +- ▁mysteries +- ▁bietet +- ▁gegenseitig +- ▁எழு +- ▁tej +- 庆祝 +- เล +- ▁நகர +- るもの +- மீ +- نز +- sinde +- サル +- になってる +- 上空 +- 言われた +- ▁Julia +- ۋا +- ▁grundlegende +- ▁comte +- pida +- ▁speedily +- 我们希望 +- respect +- 普通の +- ▁thoughtfully +- ▁Är +- 宜 +- sorge +- ▁betrifft +- ▁dụng +- ար +- 你如何 +- 地址 +- ▁يكون +- лета +- 都说 +- 赏 +- 強調 +- ▁Sab +- лос +- tijd +- ▁bezahlen +- ▁Serie +- イタリア +- ▁flexible +- ▁isolated +- ▁angrily +- ゲスト +- 被认为是 +- 人工 +- tauchen +- ▁точки +- 張り +- ▁Right +- borg +- ligi +- about +- 匠 +- さんから +- ▁passive +- ▁delayed +- かす +- ダン +- জে +- 想着 +- ▁vacant +- 宴 +- ▁congregation +- 分かり +- خور +- ▁juice +- ستخدم +- るべき +- 子どもの +- ▁poble +- 三振 +- けれど +- 緩和 +- ▁pigeon +- сны +- ▁большое +- ▁Westen +- әһе +- сни +- ▁ensuite +- ▁agencies +- ▁remorse +- ▁смотрите +- الات +- 工事 +- 터 +- ▁отвеча +- igkeiten +- ▁Universitat +- bwiye +- という感じ +- 통 +- ▁identific +- 男性は +- 冻 +- ▁quá +- 日曜日 +- ▁সেই +- ミン +- ído +- ▁parcel +- ▁êtes +- ▁ringing +- 癌 +- ▁baixa +- ▁благо +- するか +- 组合 +- ▁predomin +- reisen +- hnung +- ▁கரு +- 贪 +- ▁Häuser +- ▁nowadays +- ▁Victoria +- ثلاث +- imecres +- ▁ошиб +- 保健 +- ▁goose +- ▁nieuwe +- ▁consented +- 匪 +- ▁geloof +- ▁Würde +- ▁pup +- 集まって +- ▁parade +- これらの +- 周围的 +- 美好的 +- bildung +- ▁plank +- ▁blink +- ▁behalten +- ▁сказали +- ▁каких +- ▁දෙ +- 佑 +- 呂 +- ▁bestimmt +- ▁بده +- ţa +- 但她 +- 寿 +- ▁ўз +- iera +- ▁shrewd +- ▁рядом +- меж +- 毎年 +- ▁persistent +- ▁addict +- ▁mogelijk +- ▁Christian +- ▁disconnect +- ▁dadurch +- gisha +- ▁politicians +- ▁audio +- 让人们 +- ▁Harvard +- ▁Niemand +- fí +- ড়া +- ▁digest +- 同一个 +- 契約 +- ▁corona +- kku +- 広い +- 以色列 +- ▁Kreuz +- 陪伴 +- ゃ +- ▁Gü +- ▁uitge +- ▁seasonal +- ▁sanction +- Th +- යා +- vär +- 幻想 +- ▁Kate +- 麦克 +- 榜 +- ▁exchanged +- ▁verfolgt +- メジャー +- ▁jardin +- ▁Kritik +- ▁Islam +- 心态 +- 侵 +- ▁console +- ▁الان +- ▁fulfil +- 多数 +- fiel +- ▁voraus +- conscious +- sohn +- 부터 +- 時半 +- ▁canon +- ▁zahlreiche +- 盒子 +- মু +- länge +- pila +- ▁линия +- 偶然 +- ▁whatsoever +- 转变 +- ▁eleventh +- 暗示 +- 心理学 +- ▁albert +- 表现出 +- 蜜 +- ▁pennsylvania +- 새 +- らん +- たくない +- tev +- плы +- 扑 +- ▁taun +- 手中 +- ▁onward +- імі +- 空間 +- ▁thời +- ▁евро +- ▁tuesday +- ▁scotch +- ▁смысле +- しているということです +- ▁redeem +- čni +- 評 +- лык +- ▁Divendres +- ประ +- ▁tested +- ▁passar +- 偶尔 +- ▁Group +- ▁знаем +- 二十四 +- 想定 +- ▁Satz +- 淘 +- 摧毁 +- ▁dapat +- ▁helmet +- ▁historic +- 特定的 +- ▁youthful +- ▁relieve +- ▁chris +- ▁argued +- 十几 +- ▁eerst +- ▁документ +- ▁poate +- мор +- 退休 +- 你有没有 +- 帰り +- ▁මෙ +- হু +- 鎖 +- 長く +- ▁deliberate +- казаць +- 違 +- ▁shed +- ographie +- 舒 +- ▁jaun +- ▁wanneer +- ▁ببین +- 燃烧 +- taking +- ▁Schau +- 发现自己 +- 出てきた +- comm +- وجه +- ▁contro +- ▁angenehm +- চ্ছ +- ▁awaiting +- عَ +- ▁watu +- ▁overwhelming +- 宠 +- ▁peuvent +- ▁erlebt +- が好き +- 仕事を +- 体系 +- vidi +- ▁relating +- ▁sweetness +- ▁rhai +- ▁круг +- カリ +- ▁هستم +- ▁disput +- 発達 +- schnitt +- ▁Bü +- ▁recognise +- ▁tracta +- 只是为了 +- 通過 +- ではなくて +- 始まって +- ▁discharge +- 不出来 +- ▁inventor +- ▁bog +- ▁français +- 贤 +- 锻炼 +- びっくり +- аваны +- 公正 +- ゅ +- ▁страна +- 安装 +- ▁kwenye +- ніка +- urage +- 在网上 +- ▁drown +- 年ぶりの +- terri +- zellen +- ת +- セル +- いかがですか +- 授業 +- ▁morgan +- ▁miller +- ほうが +- ▁neuer +- 论文 +- ▁wunder +- ▁Fach +- ▁skies +- 标题 +- ▁Sitz +- zional +- خواهی +- 見てみましょう +- ▁transformed +- 没办法 +- ▁melted +- ▁encontrar +- ▁genre +- 擅长 +- ▁Middle +- 说实话 +- ática +- ności +- グル +- ▁cultivated +- ▁pioneer +- 基準 +- 生态 +- ▁இல்ல +- ▁neden +- スープ +- ▁Questo +- ▁pry +- 食感 +- ▁Wall +- ▁regulation +- 锋 +- 我认为这是 +- 近くに +- gemeen +- ▁hospitality +- ▁erected +- ▁Have +- ▁interaction +- greif +- ಂ +- ▁tissue +- ▁таго +- ▁элек +- 不要再 +- ▁Brun +- ▁gill +- っちゃって +- what +- 変え +- ▁adapted +- 铃 +- マンション +- جار +- ▁frighten +- ▁Gw +- ▁Top +- ▁juist +- ▁Jeff +- ▁wrth +- ▁frisch +- 好听 +- ▁penalty +- ▁څه +- ▁marion +- ▁Front +- ▁Arch +- ▁dicho +- ော +- ▁optimistic +- ▁Zusammenhang +- әл +- 铁路 +- ▁accounting +- ▁vocal +- оби +- ▁representing +- ▁cancel +- ĕ +- 转移 +- రి +- 七年 +- ▁kubera +- 饮食 +- ▁unfold +- 无聊 +- ▁Stephen +- ▁bestow +- ▁регион +- ▁terrific +- 监管 +- 場所に +- وک +- ▁matin +- ▁conveyed +- ▁склада +- ペース +- ▁adhere +- ついに +- And +- ▁underground +- tuvi +- ▁apenas +- ▁allowance +- ▁противо +- Rires +- ▁нашего +- ▁نیاز +- ▁begon +- ▁harold +- ことから +- 定期 +- 返し +- ベース +- ▁кост +- 测量 +- ▁deceive +- ダウン +- 部署 +- 应对 +- 找个 +- 命名 +- ▁bucket +- ▁traitor +- ඇ +- ʻ +- ވަ +- gá +- ▁அமை +- ▁Zweifel +- овым +- 殺人 +- 落下 +- ▁Tell +- ▁стало +- 跪 +- ▁Lieblings +- ▁earthquake +- రు +- рик +- 処 +- ▁pavement +- 我真的很 +- 離れ +- ▁mora +- 開いて +- 十个 +- ww +- indre +- eerde +- 次に +- 否定 +- ▁violin +- ▁numa +- ドリ +- 加州 +- ましたけど +- ▁моему +- vēr +- ▁enfin +- いける +- ▁plunder +- grado +- ジャン +- ▁podía +- били +- ▁mourn +- ▁circula +- ▁screamed +- ▁eternity +- ▁privi +- ほう +- ▁되는 +- 正面 +- ▁üle +- ombra +- ▁வரு +- pane +- åg +- ▁entlang +- 团长 +- 驻 +- マル +- 残る +- ných +- 思いを +- ▁Jackson +- ҭ +- も多い +- ▁එක +- を行い +- achtig +- 的证据 +- ▁geschehen +- 慮 +- ▁gentil +- ▁найти +- わし +- 分裂 +- impi +- 悦 +- ▁định +- 弁護士 +- 陽性 +- zubringen +- 早点 +- ▁autonom +- 手術 +- 種目 +- 我说的 +- ޮ +- 审判 +- 年纪 +- ▁其实我 +- ▁gris +- ▁показал +- ▁tourist +- быт +- 机场 +- ▁interrupt +- 守り +- 筋肉 +- ▁manière +- ユニ +- 作戦 +- ▁proced +- 響 +- ▁morality +- ▁Wunder +- igeze +- ▁awfully +- ▁gesti +- ▁Allerdings +- ▁학교 +- ▁dwarf +- 上市 +- ▁futbol +- кен +- ▁construir +- 疯 +- 大雪 +- ▁physics +- ▁beteiligt +- オーストラリア +- ▁hearth +- эння +- ▁cavern +- бат +- евский +- 路线 +- іі +- ▁Donc +- ና +- ▁appreciation +- ▁trousers +- 基础设施 +- ▁gewissen +- 城镇 +- 你看到 +- ▁организаци +- 悬 +- 抹 +- ▁hinzufügen +- ▁brillant +- ▁Donald +- ご飯 +- ▁dominant +- рах +- 伙计 +- meɣ +- ▁benim +- 鉴 +- выше +- 郁 +- 捷 +- クリスマス +- ▁ມັນ +- ▁اسم +- その中で +- 看不见 +- ывают +- らせて +- 饮 +- йын +- かぶ +- 掌声 +- ори +- ▁puzzle +- ្រ +- 队伍 +- ▁carved +- ది +- ▁admission +- слуш +- 鼻子 +- ouvri +- pta +- ▁yani +- ▁extending +- ப்படுகிறது +- தில் +- 官方 +- шчы +- ▁пойм +- 皆さんに +- रे +- ▁minim +- ▁forgetting +- rseits +- طي +- 日々 +- unterschiedliche +- teko +- bewegung +- ▁coffin +- ▁Information +- 挥 +- いきたいと思います +- ▁arribar +- kubernetes +- ▁nachzudenken +- '55' +- 狙い +- '3000' +- ▁ultra +- лым +- 業者 +- ▁تحت +- ▁menace +- ▁Instrument +- いかない +- ▁Boy +- گا +- ▁Gehen +- ەیە +- ▁capability +- 양 +- dział +- වේ +- 经典 +- の仕事 +- abri +- 都不知道 +- 的家人 +- луг +- 游泳 +- ▁owl +- 仍然是 +- ▁attachment +- 今から +- ▁Apo +- ▁наступ +- Oh +- 所以当 +- ▁тихо +- ▁casting +- 然后在 +- 常に +- ▁laatste +- weza +- тир +- マス +- ▁bộ +- ▁luther +- ▁عام +- ▁скажем +- ▁меньше +- 溶 +- ▁делает +- ▁Hinsicht +- ▁ambient +- ▁бя +- 一号 +- illion +- ▁Zweiten +- ajja +- ▁подобн +- ▁sylvia +- 縁 +- ごめん +- ▁trở +- ▁crude +- すみません +- fanya +- 灾难 +- นี้ +- ▁dritten +- ▁transferred +- ▁freight +- ▁chemin +- ▁milieu +- 他认为 +- を発表 +- ▁cruise +- ▁deepest +- нап +- леш +- ▁Ecke +- ▁Kreis +- 要不要 +- öß +- ▁Minute +- ▁процент +- 广州 +- ▁sublime +- ▁получается +- ▁Brad +- 然后是 +- ▁allgemein +- 农村 +- ▁болно +- ▁Eigenschaft +- 比較 +- ▁grandi +- drängt +- ۋ +- ▁அதிக +- ▁Cri +- 場所で +- سك +- ▁çek +- щий +- 纽 +- 冯 +- ▁живот +- ▁مې +- 便利 +- krimin +- 当然是 +- ▁kicked +- MS +- 友好 +- ▁Krankenhaus +- ▁Trotz +- じゃないか +- 成果 +- 剩下的 +- ▁Vision +- ▁verdade +- ▁pê +- ▁Түүний +- ▁پیدا +- '=' +- ▁пусть +- ▁Privat +- マネ +- ▁Quar +- 物体 +- ▁ນີ້ +- ませんか +- 单词 +- 触れ +- 山上 +- ママ +- koloni +- bogen +- ▁đổi +- ▁Tanzania +- 在我看来 +- 太陽 +- нец +- 生长 +- ▁Dev +- ▁scrutin +- نَ +- ▁девятьсот +- llah +- の裏 +- trum +- に加え +- ▁fulfill +- ▁regardless +- ▁surpass +- ▁வந்த +- ▁seixanta +- ▁inflation +- ▁augenblick +- орд +- ▁infinitely +- ▁ufite +- 时光 +- ▁Lauf +- 精力 +- ēr +- が確認され +- மெ +- හා +- ▁பேச +- schmerz +- unterricht +- reɣ +- 创造了 +- ▁veure +- ▁sechzig +- steller +- ▁retour +- ▁untersucht +- に入った +- 方がいい +- ▁constitutional +- 号码 +- ▁educational +- มัน +- ▁Those +- ▁Quando +- ▁herbert +- 以便 +- ▁années +- ▁acres +- 帆 +- ▁maître +- ▁langer +- 罗斯 +- ▁syria +- office +- ▁nebo +- ▁контрол +- ▁everyday +- ▁ähnliche +- ▁diferents +- නවා +- 密码 +- ▁Blue +- ▁broadcast +- žu +- дают +- أن +- pik +- 月亮 +- stimmung +- ▁Krä +- ▁satellite +- ▁weltweit +- ▁figli +- ▁tailor +- くれる +- 腐 +- ሽ +- の写真 +- ▁كه +- 変更 +- ▁negroes +- ▁همین +- ▁Acht +- strom +- مەن +- 勾 +- 妨 +- 一夜 +- 働いて +- ▁lachte +- ▁persisted +- perto +- larga +- படி +- ▁Wirkung +- ▁gezicht +- ▁فرا +- geladen +- овые +- vana +- ▁bemerkte +- ▁beliebt +- ▁гаражно +- 싸 +- ないといけない +- ▁provoca +- ▁Ausdruck +- 两者 +- ▁canvi +- ▁complexion +- гран +- ▁vedere +- DA +- ӡ +- häuser +- とかも +- 留下来 +- ▁چون +- Atlanti +- schwer +- 姿を +- 死去 +- ▁bewildered +- ▁refrain +- ▁verfügbar +- ▁dije +- ▁praying +- ▁fuera +- 代价 +- ▁Selle +- হে +- ▁hyd +- 全身 +- يين +- ▁немного +- ▁أخرى +- ▁Qualität +- 护士 +- 年齢 +- ▁tension +- Prince +- ième +- шет +- 谦 +- ẩ +- ucu +- ▁ними +- ছা +- ▁Ky +- 知道他 +- 蔡 +- 拦 +- instagram +- ovitch +- ▁orphan +- телей +- ▁hablar +- ▁subsist +- ▁grape +- アナ +- давать +- фан +- ▁välja +- を使い +- 汽 +- 另一种 +- нікі +- 排除 +- ▁scientist +- ভি +- лыг +- ▁tribute +- таш +- ▁lind +- 低気圧 +- ▁ohio +- ▁universit +- 简直 +- zaj +- ▁Letzte +- țe +- abaye +- ▁nke +- 週末 +- 仏 +- 主持人 +- ▁먹고 +- ▁recherch +- ▁wohin +- ▁pulse +- ▁lazy +- ▁impatiently +- ▁invalid +- ىدى +- を巡って +- 富有 +- ▁identical +- 霸 +- 刊 +- responsabil +- 違反 +- ▁futuro +- ▁داریم +- ителей +- ▁Zeitung +- سون +- 置き +- ▁pete +- 基督教 +- щения +- 時代の +- 上げた +- ▁военкомат +- 赤い +- 安全保障 +- ▁descri +- ▁Cle +- ▁Kapitän +- ▁regelmäßig +- ▁verfolgen +- 脅 +- 高騰 +- ▁Einrichtung +- itarian +- 大规模 +- 是个好 +- âr +- ▁Brust +- ▁herausfinden +- ▁privacy +- ▁важны +- ▁Out +- mitglied +- ▁dedicat +- 廃 +- ▁signature +- ▁பகுதி +- ▁arriva +- ▁quiero +- この時間 +- abba +- ▁Nehmen +- ▁enlarge +- ாங்க +- ▁Lady +- 趁 +- ▁greece +- ▁surgery +- ▁dunkel +- dokument +- ▁quiere +- ってしまう +- 雨が降り +- ▁hause +- ▁discretion +- 的风险 +- ほんとに +- ▁Universum +- ▁Foo +- kê +- ▁Pala +- ▁exclusive +- крат +- ▁assumption +- ▁sev +- を持ち +- ▁exhibited +- ▁такую +- ▁hỏi +- ▁produziert +- ▁jersey +- ▁Schönheit +- 额外的 +- ▁Vorstand +- ▁sotto +- ▁eagerness +- funa +- ▁unseen +- ▁jumping +- ▁legitimate +- ▁physically +- ▁hafi +- ▁poetic +- iyordu +- 唱的歌 +- 崇 +- ▁rubbed +- ▁humility +- 启动 +- ▁accessible +- ▁hoarse +- ▁nơi +- ▁bois +- үй +- حب +- cari +- ▁Fleisch +- ▁Для +- singa +- 犹豫 +- ▁Bedingungen +- ▁englische +- ▁sneer +- を挙げ +- ▁sentimental +- 运营 +- ▁jaroj +- 覧 +- 我一直在 +- 潘 +- ▁ingredient +- 真ん中 +- ▁Alexander +- ▁brigade +- ▁oxygen +- 相同 +- 过度 +- 校园 +- 何だ +- ключа +- شە +- تُ +- ▁Rang +- 远离 +- 相次いで +- ▁byinshi +- ▁tätig +- umugore +- ▁dictate +- ▁nhau +- ția +- 船长 +- ▁năng +- 侠 +- 誤 +- となっている +- 但是如果你 +- ▁hyper +- 该怎么办 +- 大学生 +- ればいい +- 这很 +- тоў +- ság +- ▁fifteenth +- ▁đâu +- 길 +- ▁известно +- ▁glacier +- einig +- сво +- 那一刻 +- に乗って +- ẻ +- 罐 +- ▁arriving +- ▁který +- ▁закры +- べる +- 革 +- ▁retirement +- ▁marching +- ှ +- ▁annoyed +- ▁üblich +- yerek +- говарива +- kampf +- දා +- 提议 +- ▁Western +- なぁ +- ▁kubona +- ▁کجا +- gezi +- 疏 +- 进展 +- ▁города +- ▁hurriedly +- ▁kiuj +- 年ぶりに +- ▁appearing +- NA +- ▁elfu +- 鸡蛋 +- 涛 +- ▁bernard +- ▁manual +- бед +- ▁banc +- 美しい +- schneiden +- لعب +- ▁Maßnahmen +- ▁superstition +- speicher +- ▁embark +- 観客 +- koko +- ▁manipula +- お話を +- ▁joyful +- হি +- ▁incessant +- ▁courtesy +- ҳа +- ▁fain +- imbi +- '[' +- 为他们 +- ▁determina +- 本部 +- 民間 +- もらいたい +- 裸 +- أت +- 做了什么 +- лыш +- kiye +- bleib +- ▁şeyler +- 売れ +- ▁Germany +- 我很高兴 +- ありますよね +- 呵 +- ▁вокруг +- 专注于 +- ▁sequence +- 予算 +- ▁indirect +- ▁bilden +- lässig +- 合适 +- ຽ +- ჩ +- You +- 所属 +- ▁tries +- ▁speeches +- カット +- ▁trivial +- ▁posterior +- 見通し +- ▁rhan +- оруж +- 去过 +- ҩ +- ▁monstrous +- 遊び +- ▁hàng +- ▁союз +- 都不会 +- 見られます +- ▁ernsthaft +- ▁Tatsächlich +- ▁мозг +- しん +- beho +- horse +- ▁breach +- ファイ +- ▁když +- ▁muscle +- ▁comparable +- ▁grasped +- 誕生 +- ▁Nta +- ▁zyn +- ▁Они +- 有几个 +- 碑 +- Ю +- ▁Firefox +- ▁carolina +- 控え +- ່າ +- 布莱 +- gruppen +- 澤 +- ▁круп +- 働く +- نق +- 볼 +- ▁yake +- ▁tiek +- 冠军 +- পে +- baho +- ▁Streit +- 晋 +- ▁Brü +- ▁направлен +- 维尔 +- fär +- presa +- についても +- ▁tõ +- ▁administrative +- ▁communicated +- ubah +- реп +- க்கிறது +- 老大 +- 選ばれ +- ▁благодар +- ▁desperately +- corre +- šķ +- ▁sweeping +- 怎么可能 +- ggie +- ▁recur +- ▁உட +- ▁floated +- 推出 +- прэ +- 在上面 +- 織 +- ◆ +- ▁twelfth +- 腺 +- 女孩子 +- ▁counting +- ▁hurrying +- 如果没有 +- ▁Abschluss +- ▁stealing +- ▁Deck +- 稲 +- ▁Bevor +- कि +- みてください +- ▁کمی +- ▁Verkehr +- ▁Martí +- ▁assess +- 逮捕された +- ▁حرف +- gekomen +- 天才 +- 类似的 +- ▁Online +- ď +- ▁እንደ +- 订阅 +- 渋谷 +- ▁už +- サイズ +- 支配 +- чку +- ▁advertise +- ▁wanjye +- urile +- 丧 +- ▁aussieht +- ▁общественн +- ▁Like +- ▁kirk +- ▁tennis +- ▁журналист +- ▁сторону +- ▁دنیا +- 온 +- လ +- ▁главное +- ▁fiz +- funktion +- lå +- ▁fotografi +- шат +- 対策を +- かれて +- 缠 +- ወ +- ▁вполне +- ▁Flugzeug +- ▁trường +- grond +- ▁echoed +- ▁phát +- だいぶ +- ▁Sara +- ▁conversa +- ▁stimme +- ▁Quina +- 指数 +- 祖父 +- wahl +- ▁klug +- righteous +- рап +- ▁mujeres +- 的痛苦 +- 六年 +- 全都 +- ▁clasp +- 浏览 +- топ +- ▁horizontal +- 坚定 +- ▁wengi +- ▁bored +- 局长 +- ▁travaille +- த்திற்கு +- fluss +- 岗 +- 現状 +- ▁但是当 +- ▁Operation +- فور +- ▁junto +- 你到底 +- ▁convenience +- 抑郁 +- ▁survived +- ▁образова +- 公寓 +- लि +- ▁mesure +- ització +- パラ +- ▁mourning +- ▁dart +- ▁Grün +- ސ +- ਿ +- ▁wednesday +- 外出 +- ▁hasty +- 完整的 +- ▁compre +- வரை +- 议会 +- ఏ +- 會 +- ▁farklı +- kracht +- 你可以在 +- カップ +- 无数 +- ▁Gute +- гийг +- ▁llarg +- ▁elevated +- ▁elsie +- ▁Kommunikation +- あたり +- impine +- 特殊的 +- ழி +- тере +- ▁そのため +- нием +- ▁armor +- ▁Weiter +- 在中国 +- ško +- É +- යට +- ▁thyself +- 沸 +- ▁achtzehn +- ▁extinct +- 太多了 +- ▁dresses +- ▁Prime +- ▁carrera +- ுகிறது +- 不认识 +- ▁expenditure +- ▁innumerable +- 気象庁 +- 公安 +- grown +- 不开 +- ▁infantry +- ▁abide +- に合わせて +- '39' +- 疲れ +- ▁schuld +- スキー +- ▁jog +- وضع +- ▁européen +- ▁seria +- 合格 +- いら +- ာ +- 伐 +- ▁உம்ம் +- రా +- 无关 +- ▁diffus +- দিন +- 的语言 +- ▁Sarah +- 书中 +- storm +- 受害者 +- 仕組み +- ▁banquet +- 早餐 +- кос +- 亿元 +- 10% +- 供給 +- アイデア +- технолог +- ▁diwujudke +- ่ง +- ▁energetic +- ▁bunny +- 从来不 +- burgh +- ிருக்கிற +- ▁glittering +- 嘘 +- 8% +- 留言 +- 二十五 +- CD +- さえ +- ▁geändert +- ▁увидел +- ▁roedd +- 你这么 +- BA +- ▁spray +- ことができます +- 有能力 +- alina +- ▁disposal +- ▁système +- ▁dreary +- ▁quarante +- ▁بأن +- ▁vient +- 第三个 +- ▁vacation +- ▁Ohne +- 币 +- 倡 +- ▁unnatural +- 결 +- 少なくとも +- ▁Another +- みんなが +- ▁fuss +- ▁خواهد +- ▁patro +- ▁personage +- ▁жыв +- ▁ddim +- альным +- ▁scheinen +- ▁probabil +- ▁Bewusstsein +- ▁Militär +- 裙 +- ▁কথা +- ▁Sagte +- ▁whe +- pola +- ▁missus +- 我觉得我 +- ▁quaint +- viel +- ▁telescope +- 涵 +- ▁тупик +- 在某种程度上 +- 言うと +- 传递 +- 的灵魂 +- दा +- ▁töten +- 事务 +- ▁demonstrated +- 教训 +- maro +- ▁figura +- しろ +- 痕 +- ▁предлага +- ▁metrics +- ண்டி +- ▁Iyo +- を前に +- 4% +- ▁marian +- hagen +- ▁ради +- цеп +- ▁слушай +- ▁демократ +- ▁ситуация +- ▁Drogen +- ▁dose +- افت +- ▁اول +- ▁suppress +- bril +- ▁beschlossen +- 涂 +- 謎 +- ಪ +- badde +- ▁있는데 +- ▁drunken +- ▁seule +- ▁afge +- 囲 +- 爸妈 +- 棚 +- 峡 +- ▁bacteria +- ▁erscheinen +- が続き +- ț +- قار +- хоз +- 紧急 +- 好朋友 +- рада +- ▁examining +- 同性恋 +- 韩国 +- 很高 +- ▁спец +- ヘル +- ере +- 一歩 +- batu +- ▁automobile +- 執 +- を含む +- ▁gerecht +- ▁welcomed +- ▁agricultural +- っていうのを +- ▁tudo +- 相信我 +- 喜欢你 +- ▁woorden +- 彻 +- তু +- ارات +- 这都是 +- 模糊 +- 逆転 +- ダブル +- ▁мае +- ショー +- ▁dirigi +- 检测 +- 哪怕 +- 我父亲 +- ▁eric +- із +- ▁parc +- dodd +- 饰 +- រ +- ▁December +- 겠지 +- ▁yali +- ▁gordon +- ▁Stamm +- बा +- ▁ikintu +- مین +- ச்சு +- 美好 +- ▁interrog +- Aplausos +- EM +- 警報 +- ▁Treffen +- ▁Tau +- ▁rive +- を食べ +- ▁sử +- ▁останови +- ▁oliver +- onian +- ▁Ez +- stuk +- 逛 +- ▁внутри +- ▁decisive +- ▁unglücklich +- の様子 +- 広がる +- 这个项目 +- ▁Klima +- ▁primero +- jang +- ▁Betrieb +- ▁sealed +- ▁insane +- ۔ +- ▁gewoon +- 为您 +- ▁tenderly +- ▁самый +- ▁oppose +- ▁ответил +- eremoni +- саб +- ▁correctly +- FIL +- าน +- ▁어디 +- ▁realidad +- behörde +- gerufen +- уха +- 房子里 +- ▁sujet +- ▁classroom +- ▁hübsch +- ▁sekolah +- ▁گروه +- ▁Gli +- 足以 +- ▁Carolina +- yorum +- ▁epidemi +- áz +- liwa +- ボン +- ▁feminine +- ▁które +- ▁primo +- ▁verdi +- кот +- ▁graham +- mele +- 你们两个 +- िक +- 感受到 +- 无人 +- 될 +- ▁knelt +- ▁tìm +- ▁hebrew +- ▁Meter +- กล +- ▁clothed +- 使える +- 陽 +- amazi +- Kanal +- ▁бага +- 心灵 +- 逸 +- ▁klingt +- ▁видео +- лоб +- 撤退 +- 景色 +- nect +- ▁vorsichtig +- ಗ +- ▁bachelor +- ▁хочешь +- ▁شرکت +- 見ていきましょう +- もあって +- 6% +- ▁Mah +- ▁Су +- லோ +- வேண்டும் +- 浓 +- ▁carpenter +- ▁shrub +- ▁nourish +- 始める +- ようになって +- しまして +- ▁awareness +- 闯 +- ▁невозможно +- ▁나오 +- boro +- ▁strife +- ▁dismiss +- 综合 +- িয়ে +- witz +- حدث +- pür +- 图片 +- 놀 +- 鍵 +- ▁subdued +- spoonful +- ▁Finally +- 看不到 +- 运动员 +- کات +- ▁rector +- 的角度 +- ▁faculties +- 麺 +- 円安 +- 緊急事態宣言 +- ▁molecule +- ▁plough +- ▁вчера +- ▁tiến +- Pierre +- مند +- 市では +- ▁Ми +- bika +- ▁obstinate +- ▁saviour +- 庞 +- pflicht +- 大海 +- ▁habitat +- 这是我的 +- urira +- 識 +- ですとか +- hani +- ▁candid +- 屁 +- ▁ممکن +- ▁এবং +- ▁ລະ +- tsinda +- شته +- 农业 +- 不同意 +- ▁Februar +- లో +- ிலிருந்து +- 太子 +- kleri +- ▁яму +- ▁draught +- 前提 +- ▁envia +- ▁também +- ▁crust +- ▁Калі +- ▁Verfahren +- ▁manifestation +- ▁verbringen +- ▁честно +- 做饭 +- 今天晚上 +- ▁видите +- kò +- ▁marshal +- ▁Bäume +- ▁astonishing +- ▁conjecture +- ▁embarrassment +- 畅 +- 眼泪 +- koli +- альные +- ▁concentrated +- 岩石 +- ười +- ▁amiable +- ▁hiyo +- ধ্য +- 自行车 +- ▁societies +- 舰 +- ▁አዎ +- ▁realise +- ▁pouvoir +- mico +- に移 +- 就行了 +- 合适的 +- ▁setanta +- fenster +- ▁dismal +- ▁Wunsch +- ▁wolves +- lil +- 建設 +- ▁rwose +- annu +- ▁aircraft +- イチ +- 拥抱 +- 電車 +- ▁sixteenth +- ▁Kurz +- Episode +- ▁فیلم +- ību +- 並んで +- 三点 +- cò +- ▁yella +- знача +- ▁حا +- この辺り +- альных +- いくと +- ▁Album +- ▁radiant +- ▁nossa +- 层面 +- хим +- ▁diversaj +- ▁imitate +- 谋杀 +- ▁resentment +- ▁shark +- 总结 +- 進化 +- 运作 +- ▁assertion +- ູ +- ▁Praxis +- にあります +- ▁confidential +- uğu +- wego +- ▁Ро +- ▁embraced +- nutri +- 与其 +- ▁buzz +- ▁camino +- يط +- 视觉 +- achtet +- 良い +- ▁скорее +- 珀 +- っていうふうに +- 继承 +- 你刚才 +- ▁صورت +- ▁países +- имся +- 是一样的 +- тэн +- ▁гэтай +- ▁находится +- ▁voilà +- ▁startling +- 我想要 +- 维持 +- ▁tray +- රු +- ▁повтор +- 的主题 +- contin +- 的人来说 +- ▁demanda +- 旧統一教会 +- ▁inspection +- 先ほどの +- ▁Senat +- ▁breadth +- sehn +- ального +- 明明 +- ▁respira +- جنوب +- 落在 +- 耻 +- 試験 +- ▁explaining +- 确 +- ▁perdu +- ▁gorgeous +- 陌生人 +- ▁Absicht +- ولد +- 遇见 +- шан +- 高級 +- ▁parlor +- хлоп +- ษ +- ▁Dijous +- ▁schlafen +- 改造 +- ▁miriam +- ▁Té +- ▁shipping +- そこは +- äck +- ステージ +- 十分钟 +- 太郎 +- ▁pasture +- ワード +- ▁nicely +- 増えて +- ▁beobachten +- 网上 +- ▁fröhlich +- ▁އަ +- தொடர் +- モー +- льны +- ▁стоял +- дзень +- ▁газет +- 名为 +- を認め +- 但这是 +- 决心 +- origine +- 느 +- Gelach +- foje +- ▁charter +- 这些问题 +- ▁vinden +- ▁Nieder +- yumba +- பர +- が出た +- ▁everlasting +- ▁trouver +- zí +- 成年人 +- ▁keer +- ▁Anwalt +- kaka +- ▁nosso +- ▁asserted +- овали +- AD +- 接到 +- କ +- ĥ +- 疫苗 +- 这一切都 +- 理性 +- ▁polished +- ▁flavor +- смотрел +- 産業 +- 你爸 +- ▁பய +- 逗 +- ▁Unión +- ▁rumor +- そうですか +- 我就不 +- ▁attributed +- ツー +- kları +- ▁passenger +- ▁bliss +- 喽 +- ሁ +- ▁verheiratet +- ▁mindestens +- ▁پنج +- ▁highness +- verfahren +- تَ +- 利润 +- ▁funkci +- ▁readiness +- ▁escala +- ▁канал +- ▁система +- ராக +- 抛 +- ▁Demokratie +- いただきたいと思います +- arbre +- 在此 +- ▁vind +- 我们有一个 +- ▁واحد +- ▁gosh +- 拨 +- ▁depressed +- ṛṛ +- ▁unlock +- の前で +- 俺が +- ▁거의 +- 小麦 +- ▁whereby +- ▁tint +- mizi +- ▁stretching +- 趋势 +- 名古屋 +- ภ +- ấu +- ▁putea +- 试验 +- ▁clergyman +- 劣 +- équipe +- খে +- 決まり +- ший +- ▁motiva +- ▁Atem +- ▁ئەم +- 遮 +- 천 +- 煙 +- ▁Bildschirm +- パパ +- 回到了 +- ▁await +- ▁voted +- 构成 +- 滅 +- ▁Hari +- ▁righteousness +- ▁женщина +- 白天 +- ▁gerald +- 旋转 +- 我想说 +- ▁surf +- 酱 +- ▁Tief +- দ্ +- ▁உள்ளது +- 尸 +- ứng +- boek +- brook +- ▁Erwachsene +- 激励 +- щик +- ▁acceptable +- ▁món +- ▁enthusiastic +- ▁upside +- ่ว +- umuryango +- 収穫 +- 欺负 +- ▁engaging +- ▁Reform +- ▁chop +- үнд +- 的需求 +- 很简单 +- loch +- ▁EBITDA +- ނު +- 龄 +- ▁Spring +- ▁logo +- ▁engel +- ▁fry +- 私たちが +- ▁stammt +- 焦点 +- ▁Mexico +- ▁kerja +- 都内の +- For +- 大厅 +- ▁noranta +- einheit +- ちゃんは +- 我们开始 +- ▁Option +- ▁사람이 +- 足够的 +- んだね +- SA +- ई +- ▁gewöhnlich +- 炒め +- ▁squeeze +- 技巧 +- ▁tyrant +- ▁کمک +- ▁animated +- ▁coverage +- 獲得 +- ▁кап +- ▁Schrift +- ауы +- 見えない +- ▁그치 +- カフェ +- が流れ +- 亲密 +- ▁purity +- 等一下 +- 就好了 +- ▁foremost +- ▁niedrige +- కు +- halen +- ட்டை +- zünd +- ▁olur +- ▁eliminate +- pez +- ▁xem +- 重视 +- ▁konzentriert +- ▁extinguish +- ▁cursed +- ▁louder +- ▁disdain +- strich +- の中から +- フライ +- Krista +- ▁الأول +- ▁plantation +- ▁eindeutig +- ▁einzigartig +- 弦 +- 訪れた +- рю +- 盒 +- ▁hauptsächlich +- ▁Тэрээр +- 保障 +- ▁সং +- ▁Norden +- ▁stamped +- ехать +- verkehr +- wszy +- ▁pleaded +- 忌 +- 熱中症 +- empi +- арга +- 一分钟 +- ▁kadın +- адзе +- 我妈妈 +- živ +- 袭 +- ▁меч +- ▁квартал +- ▁Garten +- ▁frantic +- ▁Hij +- ▁iraq +- がついて +- ▁ئې +- 凌 +- ▁squirrel +- ▁denomina +- ▁imposed +- цэн +- 他们已经 +- ▁Theater +- ▁yiwen +- ▁Pap +- 央 +- ▁tüm +- ▁depended +- 亦 +- ▁gerçek +- ▁berühmte +- hize +- ▁Veränderungen +- ▁unworthy +- feind +- дох +- Instagram +- ▁Benutzer +- ▁dernière +- ▁Biz +- ▁زیر +- ießen +- ▁attempting +- ▁процентов +- が見つか +- 接受了 +- ▁связи +- ▁слышал +- 你有什么 +- யூ +- ცი +- نے +- ▁nearby +- 踩 +- ▁nhw +- ▁درست +- شعر +- ▁waiter +- 容器 +- серьезн +- 前後 +- schijn +- ӧр +- environ +- ▁tiuj +- 壳 +- ▁افراد +- ▁Muster +- 会发生 +- ▁아무 +- ली +- से +- はどんな +- eeuw +- сидел +- 娃 +- 睁 +- sammlung +- குறி +- ▁shit +- пыт +- 营养 +- просил +- रो +- chè +- ▁complained +- ▁Szene +- оф +- ▁imitation +- цам +- runner +- 保罗 +- ▁بدون +- ▁Mol +- 痛み +- iĝo +- ▁tackle +- скіх +- ▁mehreren +- vance +- ▁öğren +- 本质上 +- ボード +- 隔离 +- ▁encouragement +- 政党 +- ▁үйл +- 文本 +- konna +- weiß +- 辜 +- 担忧 +- third +- อน +- бач +- ▁devote +- おいしく +- ▁January +- ▁consumed +- ▁pyramid +- ▁verursacht +- ▁banking +- コロナ禍で +- 決まって +- ▁realised +- ▁Syn +- 向こう +- 轩 +- ▁международн +- ▁orchestra +- 账户 +- ▁أيضا +- ▁привет +- ▁digo +- ▁мере +- ▁reaches +- ▁nhiên +- ▁aktuelle +- stow +- あそこ +- 况 +- 紀 +- ǧǧ +- ▁retten +- ▁stability +- ▁mož +- ▁يُ +- ▁waking +- ▁Cada +- 됐 +- 车站 +- 你想要 +- 並み +- ەڵ +- ▁collaboration +- 歳で +- 더라 +- ਤ +- ▁skeleton +- ▁Church +- ▁duration +- clav +- ▁другое +- ▁frère +- ▁Institute +- ▁quina +- இன் +- rijk +- 最新的歌 +- 我刚刚 +- актив +- ▁phần +- ▁wandte +- ▁Power +- ▁région +- ▁வந்து +- ▁cierto +- 基本上是 +- ▁Back +- が生まれ +- ▁whispering +- ▁страх +- ▁copies +- ▁rouge +- ▁убийств +- ▁eloquent +- ▁Hong +- ▁kuva +- ющим +- Net +- 晚了 +- ▁nostra +- 我们一起 +- 然后他们 +- движ +- 的反应 +- 这样的话 +- 考え方 +- 舒适 +- あるんですけど +- லில் +- 意味着什么 +- locat +- ▁boiled +- 不明 +- ▁fashionable +- ▁proprietor +- 複数の +- 框架 +- ▁beantworten +- ▁segundo +- 導入 +- альной +- おなか +- ▁cinquante +- 札 +- ▁guinea +- ▁gabriel +- 的作用 +- 買って +- わかる +- ▁geschah +- aquell +- ▁fino +- درا +- সু +- ▁Händen +- kamera +- ▁expertise +- ▁scenario +- ▁حيث +- 次々と +- 連携 +- 打って +- 我心里 +- 嫂 +- を出して +- 诈 +- 太棒了 +- を目指し +- 扩展 +- ▁concentration +- 修复 +- ▁boca +- fay +- 机制 +- లే +- ▁дверь +- 底下 +- 侦 +- 栗 +- ▁Interview +- ▁agitated +- 沃尔 +- みる +- ▁curios +- āka +- ▁solely +- хгүй +- 발 +- ▁Gedicht +- ▁klicken +- 到处 +- 不管你 +- 提示 +- 覚え +- zah +- ▁ayrı +- 捡 +- ▁warfare +- ▁colonial +- ▁аан +- ▁superintendent +- ▁放一首 +- 一顿 +- 这些都是 +- ▁répond +- 小学生 +- でもいい +- 가지구 +- ▁lawrence +- ایت +- legung +- sexual +- 詰ま +- アリ +- 敦 +- 验 +- மைய +- してしまう +- 谨慎 +- ▁difficile +- ▁конкрет +- ▁describing +- jährigen +- ▁bastante +- 诚实 +- ▁exercised +- furi +- ▁facile +- ี่ +- ▁celebrate +- Risate +- 金曜日 +- ▁Mission +- ▁спас +- umunsi +- 对他的 +- zelve +- 星星 +- ▁erano +- ▁Osten +- ▁gael +- 慧 +- ▁ප්‍ර +- 朋友圈 +- ▁tusschen +- yita +- 今度 +- 周年 +- 聚会 +- дні +- 漠 +- 耗 +- 毫无疑问 +- 轰 +- ▁скажите +- ▁sandwich +- ▁chambre +- ▁traveled +- ▁decidedly +- ▁حالا +- ▁despise +- nesi +- ŝa +- ▁Schauen +- 水果 +- ▁herald +- 婚礼 +- ▁overwhelmed +- ▁heathen +- 一堆 +- netz +- ▁mississippi +- 土曜日 +- ▁internacional +- あすの +- hydr +- 跳舞 +- ▁enthält +- ▁reel +- 按钮 +- 有效的 +- mayı +- ▁resigned +- ▁quantities +- ▁submission +- ваецца +- ▁stärker +- 倒れ +- estructura +- ▁Gegensatz +- ▁llibre +- 走过 +- ▁reminder +- وں +- щение +- Wow +- ▁içinde +- 还想 +- ▁contrived +- ▁ignore +- 我也不 +- 져 +- ▁gregory +- ▁hejuru +- ▁korea +- ▁realiza +- 娘娘 +- ▁cooked +- ▁Stimmen +- ▁satisf +- 脆弱 +- ▁vaguely +- 顶部 +- 误会 +- 無料 +- 受到了 +- 的时候呢 +- 把手 +- ▁subsidi +- ▁necesita +- 假装 +- ▁persecu +- ▁dintre +- ▁endured +- lender +- étais +- ▁litter +- 愧 +- なのです +- 我们发现 +- ▁Tamen +- ▁hopeful +- check +- 声称 +- 意义上 +- 僕ら +- 各个 +- ▁vertrauen +- පා +- ▁khu +- ▁pathetic +- 不清楚 +- 側は +- 一个很好的 +- 六月 +- kker +- ▁hả +- ▁harness +- ▁vanish +- geblieben +- 塚 +- ▁orchard +- 争议 +- තර +- 的核心 +- ▁bland +- ▁heftig +- ▁terrorism +- 并非 +- நிலை +- 中午 +- 衆 +- ▁arnold +- 貨 +- ▁gehabt +- 7% +- فكر +- 演唱的歌 +- 分布 +- 戦闘 +- ▁spake +- ▁negativ +- ▁kimwe +- 畏 +- 眼前 +- ▁بۇ +- ▁sorgen +- 圣经 +- 只要你 +- ▁flowed +- 当成 +- ▁önemli +- ▁zurückkehren +- 砲 +- ▁Soldaten +- はっきり +- energia +- 言われ +- が大きく +- ▁plein +- éré +- 彼女の +- ▁kasuta +- 战士 +- ▁repeating +- invest +- ▁virtuous +- ▁laboratory +- ネタ +- руп +- ად +- 贼 +- 殴 +- ▁буй +- ▁இர +- Video +- ىلى +- fydd +- 几乎没有 +- 山田 +- гуль +- ▁Pel +- 任命 +- ▁discontent +- ▁ຊິ +- ▁winner +- 可爱的 +- ▁verbreitet +- 星球 +- ▁ааа +- 耐心 +- ▁overtake +- 災 +- ▁подумал +- ▁hände +- 打扰 +- வெளி +- ▁wonderfully +- ▁staatliche +- 的房间 +- に来た +- ▁nursing +- ▁dunia +- 笼 +- ▁chưa +- ▁Verlust +- Domin +- kommst +- монт +- 逃跑 +- 估 +- 你还是 +- ▁stepping +- 驱动 +- ▁сначала +- しばらく +- 衝撃 +- ▁blake +- ▁Frankreich +- ▁exploration +- と思うんですが +- ▁clinic +- 違って +- 寒さ +- ▁какую +- ▁soort +- meid +- ▁slack +- ▁күр +- ց +- ▁Ozean +- ić +- 退出 +- ▁historically +- ▁punika +- 画像 +- ▁weighed +- 本地 +- ấp +- 济 +- ▁pouvait +- ヒント +- 레 +- ▁vuitanta +- ▁verantwortlich +- ▁neunzehn +- hampton +- alist +- ▁склад +- 時ごろ +- Comp +- '47' +- ▁Cafodd +- amategeko +- ▁mnie +- ওঁ +- ▁öffnete +- デー +- あなたが +- ▁enorm +- 明治 +- 今大会 +- '`' +- ▁archbishop +- ▁иногда +- ▁прежде +- ▁göz +- ▁entrar +- ▁знать +- числ +- 奇妙 +- ▁kuvuga +- ▁grub +- ▁rubber +- 坐着 +- に関しては +- ▁preaching +- işle +- ▁psychological +- ▁survivor +- ▁Oberfläche +- 这个想法 +- 脖子 +- 观看 +- ▁Verstand +- ▁кож +- ▁нужны +- ▁diff +- integra +- ▁Hinweis +- ▁maakte +- 我希望你 +- 循环 +- ▁Simon +- 中止 +- ▁crucial +- ▁праца +- ▁thông +- дук +- ▁ownership +- ந்தார் +- ▁Paar +- ▁carta +- 大変な +- ▁hayat +- ់ +- ▁ситуации +- 欺骗 +- ▁regime +- まるで +- مُ +- وص +- 一个女人 +- 資金 +- ം +- ▁kompliziert +- ▁Geburt +- 衝突 +- ▁Play +- ▁Ärzte +- ▁snatched +- خنده +- īgi +- vè +- ▁küçük +- ▁navig +- 両親 +- تحدث +- ▁attire +- ▁Pause +- 連れて +- ▁Einsatz +- kî +- ▁Free +- 认识到 +- லு +- 扩 +- 男生 +- 仓 +- ▁Laura +- ▁magnitude +- ацыя +- torial +- ▁tercer +- 岸田 +- テスト +- بى +- état +- ▁máy +- schläge +- 激烈 +- ▁같은데 +- ▁kep +- 这就像 +- পি +- φ +- 肩膀 +- ▁psychology +- ▁wünschen +- ▁wiped +- を求める +- ظهر +- ▁streak +- ▁Aktion +- لې +- ▁vấn +- ▁owed +- ▁assessment +- ▁solomon +- 魔法 +- ▁digging +- ▁Kleidung +- ▁волн +- піса +- 직 +- ښ +- 叠 +- 垫 +- ▁provoke +- ▁disturbance +- 范围内 +- 教学 +- ▁труб +- ▁Deutsch +- ێت +- ▁করা +- ▁Englisch +- fluent +- ▁какого +- 陪你 +- sulta +- ▁applause +- ▁тэгээд +- ას +- ▁mmhm +- ▁rascal +- тәр +- ▁honesty +- ▁Phase +- Risos +- ▁постоянно +- 申請 +- ັກ +- ▁hängt +- ▁affecting +- ▁Veränderung +- ▁опыт +- હ +- ▁اليوم +- ▁друж +- 碰到 +- ▁Bühne +- ▁порядк +- ҭа +- 新聞 +- ▁Stimm +- ▁Süden +- مام +- 演説 +- 世界上最 +- leitung +- 遣 +- 北日本 +- 감 +- 碳 +- ▁collecting +- っぽ +- ができて +- やりたい +- ▁vardı +- ▁reap +- 背中 +- ▁Farb +- 焼け +- ないよ +- 摄 +- 하다 +- 외 +- ▁appealed +- ッキー +- ▁prick +- 上手 +- お届け +- kono +- ▁мэдэ +- 顾问 +- 很清楚 +- 斑 +- ▁интернет +- 刃 +- һын +- ▁نور +- awal +- ▁flashing +- ▁Since +- 大师 +- ▁macro +- เก +- ▁unconsciously +- ▁passé +- ▁dreaded +- ძ +- ▁cependant +- 초 +- ▁Там +- ▁бело +- ▁personnel +- 犯行 +- ▁priorities +- шев +- ▁grote +- neuron +- ▁treating +- âme +- ▁Bach +- 有许多 +- සු +- ▁Pic +- ▁Ereignisse +- 感染対策 +- ▁discoveries +- spoiled +- lith +- 喝了 +- ታ +- ▁Geschlecht +- ▁excursion +- 保険 +- ▁fonction +- 場に +- maktadır +- ▁judicial +- 安倍元総理 +- ▁Sobald +- lardan +- кажет +- ディング +- ▁harmless +- 这么一个 +- ▁ministry +- ▁ຫັ້ນ +- ▁cotxe +- 嘉宾 +- を取る +- ىسى +- lomb +- という話 +- ▁siaj +- ambul +- ▁referring +- 小小的 +- ▁utterance +- 角落 +- ▁kehrte +- パリ +- 捐 +- ▁provincial +- 想出 +- ▁benefici +- ▁Aku +- ▁исслед +- ▁Kong +- ıp +- 激情 +- 周期 +- ▁Ruf +- 坐下 +- ▁ewig +- ▁African +- ▁çünkü +- ▁academy +- ▁Maybe +- 无限 +- čči +- ▁connu +- 这孩子 +- ▁banker +- 村上 +- ▁authentic +- 寂 +- 郷 +- ▁kết +- ヴィ +- ▁luncheon +- ▁알아 +- 冷凍 +- ▁resume +- андр +- ▁refined +- clus +- 这样的一个 +- ாளர் +- 鲍 +- ▁nobility +- ▁Ул +- ໍາ +- を重ね +- ▁嗯嗯 +- 談 +- ▁awaited +- فَ +- ▁Aussage +- ღ +- ▁vermute +- 涌 +- హ +- 月曜日 +- ▁tốt +- esque +- ▁جدید +- 選んだ +- ▁Wann +- 身后 +- アイス +- カ月 +- ▁beschreiben +- 诱 +- ▁chwarae +- 附近的 +- inclou +- 什么样 +- 請 +- කු +- ल् +- ▁initially +- 期节目 +- ▁roaring +- ▁Ар +- ▁другим +- ▁oven +- 必须要 +- 20% +- ▁saloon +- ▁врач +- せん +- ▁lewis +- ▁vicious +- ▁Zentrum +- ▁brazil +- 的权利 +- 美味 +- 不许 +- ▁பயன்படுத்த +- ▁вульф +- ▁видимо +- 先発 +- きれいに +- ▁ужас +- ▁obtaining +- ▁Aufnahme +- ▁podría +- ▁sadness +- 拿出来 +- 必要がある +- лыҡ +- 作业 +- 采用 +- ▁vya +- ▁feat +- ▁Although +- なければならない +- 大規模な +- 推进 +- 我只想 +- ▁durum +- 还有其他 +- どうする +- œuvre +- ▁eloquence +- ▁бөгөөд +- 栋 +- 鏡 +- būt +- günstig +- 开会 +- ▁Grab +- ▁அவர்கள் +- ロシアが +- voja +- ▁felicit +- ▁attendance +- 否认 +- ぇ +- ლა +- わずか +- ▁hunted +- ようやく +- 爵士 +- ހަ +- 然而 +- ▁mournful +- 長期 +- wamu +- 期限 +- 询问 +- ▁установ +- ▁rainbow +- 騒 +- ▁diligent +- ▁vergangen +- ▁asylum +- ▁yanjye +- koht +- ▁comfortably +- gerät +- 凶手 +- ▁armour +- pari +- ▁dignified +- ▁cuerpo +- ▁Prinzip +- associa +- つま +- ▁excite +- ▁matches +- ▁brauche +- ▁كې +- ddling +- ▁baggage +- ▁outstanding +- 秒钟 +- ▁vierzehn +- 克服 +- ▁blazing +- பின் +- 下がって +- ▁руку +- jící +- わたし +- න්නේ +- ▁consegui +- 他们正在 +- ▁qualified +- ▁tradi +- 统一 +- 人が死亡 +- buah +- ▁защит +- ▁donkey +- 新幹線 +- 欲しい +- クル +- ifuza +- miento +- ▁lumber +- jye +- ▁meteor +- ▁gradual +- களும் +- ▁crowned +- quisition +- ショート +- 糊 +- ही +- チャレンジ +- னும் +- ▁एक +- ▁sydney +- ▁Taylor +- 克莱 +- 拿起 +- リーダー +- 兔子 +- 其他地方 +- ▁Graf +- ▁ужо +- ël +- өнгө +- ▁fünfundzwanzig +- ▁estimated +- 一趟 +- ▁quoted +- த்தா +- spruch +- 激しく +- ▁Был +- čil +- トラブル +- пустил +- ▁seventeenth +- ▁thirteenth +- ▁compelling +- ▁seemingly +- ▁نگاه +- 更好地 +- ▁bunk +- のまま +- nev +- 借り +- ▁кре +- krist +- 七月 +- ▁Atom +- spezifisch +- 一直是 +- Orient +- йтесь +- ▁meilleur +- '120' +- ▁habt +- ▁liking +- ▁legislature +- どのような +- 一块儿 +- いくつか +- ▁romp +- ▁trench +- ▁сильн +- 凑 +- 鸣 +- 登録 +- ▁frail +- 获取 +- ▁Main +- いること +- 広く +- プリン +- 親子 +- 段階 +- 豆腐 +- 很长时间 +- ▁Castell +- 真理 +- ▁треб +- ▁endeavoured +- Institut +- 萌 +- Infrastruktur +- 堀 +- 诉讼 +- ▁прекрасно +- ってほしい +- キャン +- 徐々に +- ▁يجب +- ▁vall +- ზ +- eisen +- ▁Gewicht +- ▁العمل +- ▁contemporary +- 戦略 +- ▁wesentlich +- కి +- 予定です +- っけ +- ▁bello +- ▁socialist +- ということなんですね +- ▁cement +- ▁그걸 +- ▁shifting +- ▁Sup +- ▁дене +- ▁governed +- schäm +- ▁jurisdiction +- ▁giorno +- ▁triple +- funk +- ▁sneak +- 院长 +- 食料 +- чныя +- vira +- ▁spoon +- сця +- ▁silenci +- 出售 +- 几十年 +- geordnet +- ▁Philosophie +- ▁congratulate +- リズム +- ▁scheen +- ▁пятый +- ▁tidings +- ▁podem +- 北陸 +- 大姐 +- ▁hệ +- ▁Lind +- accus +- иров +- 另 +- 付いて +- ▁vegades +- ▁কৰি +- ▁inherent +- räumen +- ▁Vorschlag +- ▁Dabei +- ッジ +- ▁Interessen +- 得很好 +- неп +- ▁keel +- 一阵 +- ▁treasury +- ▁sculpture +- ▁vật +- ▁Beach +- できなかった +- ちょっと待って +- 巨人 +- ▁Verein +- 乗せ +- ▁Ple +- 冲动 +- 休み +- syll +- ත්ත +- ോ +- ▁غیر +- ロック +- чки +- 回答说 +- うまみ +- ලි +- thorpe +- 其中一些 +- をつか +- ▁outdoor +- に向けた +- ▁amuse +- К +- ▁bantu +- uwen +- 辅 +- lulu +- নী +- ativa +- 網 +- 詹姆斯 +- 懂得 +- 太空 +- 四处 +- いずれも +- ▁Vertrag +- ▁verbinden +- 脊 +- ఎ +- ▁обычно +- ▁dispatch +- 切った +- と指摘 +- ▁komplett +- 火事 +- ▁migration +- kintu +- 働 +- 札幌 +- ▁касается +- 告诉他们 +- ▁ingenious +- だとか +- عادة +- 金钱 +- ضع +- 般的 +- ▁fling +- 正如你 +- ▁durchführen +- コーヒー +- スケート +- 的意思是 +- 很多事情 +- ▁crisp +- ▁grunt +- ▁kutoka +- ES +- 爵 +- ▁dự +- ▁catastrophe +- ▁tưởng +- ▁disregard +- ▁gezegd +- ▁Kredit +- 強さ +- ▁بودم +- OS +- beruf +- ▁handling +- ▁pesca +- 根本不 +- ▁Champ +- ▁девят +- 晒 +- ▁evaluate +- ▁Dewi +- ▁ответствен +- щего +- 认知 +- ovna +- ▁paragraph +- 決めて +- ▁roared +- ▁Quindi +- 毫不 +- 름 +- ▁Sitzung +- கிறது +- ▁willow +- 欢迎来到 +- 船上 +- 维护 +- ▁tanta +- ▁Europäischen +- ▁Molt +- শ্ +- tiene +- 種類の +- を超えて +- ▁terminal +- ▁consulted +- ▁portal +- алды +- ▁المع +- assassinat +- 証明 +- 标记 +- يار +- 一起工作 +- ▁ئەو +- ▁Pflanzen +- ▁embarrassed +- kawa +- ▁serene +- ▁streaming +- ▁jason +- ▁stride +- ▁лиш +- 多次 +- භ +- ▁almighty +- ▁hurricane +- ▁Science +- ▁سره +- fashe +- quadra +- 常常 +- 国民の +- ▁dạ +- põ +- ▁discount +- を出す +- 到目前为止 +- fähigkeit +- 谈判 +- スパイ +- ▁patiently +- ▁lautet +- ▁болох +- 惊人的 +- fleur +- ▁prosperous +- 委屈 +- ică +- ▁Brian +- 两百 +- ▁ভাল +- ▁brandy +- 实在是 +- àng +- ▁produit +- палі +- 客厅 +- を示す +- ▁gezwungen +- исто +- stairs +- ▁lapse +- 镜头 +- 般 +- ▁Està +- operative +- ▁Veranstaltung +- 二人 +- දී +- ▁moll +- ▁courteous +- رَ +- ▁Oberst +- 谁呀 +- 車両 +- анг +- ▁série +- ▁behandeln +- பட்ட +- とかそういう +- лок +- ▁کړ +- ▁Austr +- 看起来很 +- 酒吧 +- ▁বলে +- пок +- インターネット +- 天皇 +- ▁estaban +- зур +- 极端 +- ▁distrust +- بری +- 去哪里 +- ▁сельсовет +- ▁نحن +- ▁erhöht +- ▁mặt +- 軒 +- ▁Abendessen +- ▁accusation +- 领导人 +- льная +- ▁görün +- 年生の +- 伊藤 +- 支払 +- 肌肉 +- ▁zitten +- したうえで +- chimp +- 恋爱 +- ▁ег +- ▁зелен +- 动机 +- ▁Konflikt +- 군 +- ▁sabbath +- '".""' +- ▁kapit +- セント +- ▁polish +- ▁simultaneously +- की +- ▁kugeza +- 相当于 +- ▁годы +- علوم +- итесь +- ▁expressing +- іла +- 美女 +- umugabo +- قطع +- ▁schickte +- ▁реша +- ▁Fälle +- ெய் +- migr +- 己 +- ální +- ▁людям +- ▁некоторые +- バーディー +- fungu +- ▁executing +- بحث +- schieben +- 引发 +- 质疑 +- ▁Gründen +- grenzen +- ▁persecution +- ▁Zitat +- 土砂災害 +- ▁besorgt +- ▁unfair +- 到这儿 +- 인데 +- عات +- ▁ребята +- ouvre +- 第二次 +- рыс +- ших +- ▁тус +- しまい +- 思われ +- 鹅 +- ▁Katze +- ▁strengthened +- kamu +- ▁Royal +- ombro +- ▁folgt +- ▁destaca +- ▁همان +- خورد +- 倦 +- スポット +- rechnung +- ▁inheritance +- ىدۇ +- ▁பிற +- ▁Kevin +- ▁behaved +- 数百 +- 明显的 +- roving +- ▁наконец +- 稚 +- 구나 +- 犹太人 +- 确认 +- ▁தீ +- لِ +- 二十年 +- 为何 +- ходил +- жат +- märk +- ұ +- ▁الوقت +- ▁depois +- ▁цар +- ▁despised +- 商人 +- ొ +- ▁besuchen +- 詐欺 +- スタイル +- ▁überzeugt +- ここの +- wili +- ▁Life +- 特别好 +- ▁ziet +- ラム +- を続ける +- rinda +- 호 +- kanya +- ▁chairman +- 木さん +- トイレ +- крыва +- ▁stooped +- ▁Schatten +- ▁kuwi +- 一本书 +- ebene +- вец +- ▁занят +- ▁garment +- ▁Danach +- ▁ăn +- ▁Как +- が続いています +- 遂 +- क्ष +- ▁acceptance +- ▁körperlich +- 義務 +- 该怎么 +- ائي +- ▁krista +- ителя +- ымі +- ▁Neuro +- ▁semble +- を進めて +- 見られる +- 遭受 +- ▁Einheit +- ▁ditch +- ▁Sydney +- ▁üzerinde +- 葛 +- ▁российской +- ▁шестьсот +- ▁advertisement +- ▁lucruri +- 伯特 +- 小林 +- ▁bamu +- հ +- ▁Pakistan +- ▁hành +- わからない +- ▁historische +- kiem +- ▁nghe +- かつ +- 约会 +- もらえる +- tsiooni +- ▁preached +- 울 +- 胳膊 +- ▁exaggerate +- ▁subscription +- ▁sworn +- ▁evolved +- 认可 +- verhalten +- ▁hạ +- нде +- ▁путина +- ▁accumulate +- いくら +- ▁Dž +- 先頭 +- ▁berlin +- 僅か +- ▁Knie +- 数据库 +- ▁жест +- ▁زیادی +- ▁envoy +- ▁самого +- väg +- 录音 +- جعل +- タイトル +- ▁protecting +- brian +- 緩 +- 勘 +- もしかしたら +- ▁shrugged +- willig +- ▁иә +- නය +- 三个月 +- 時代に +- 音频 +- 来看看 +- මි +- ▁vicinity +- ▁choosing +- ▁வழி +- 奇迹 +- ▁Schreiben +- лээ +- croft +- ▁kontrol +- бот +- ▁izmanto +- ▁좋은 +- пуск +- parent +- ▁container +- 疯了 +- 尼斯 +- 確認された +- ▁thicket +- ▁верн +- ▁dankbar +- 并将 +- 大陆 +- ▁comercial +- ▁vorhanden +- jev +- 对面 +- ▁tiefer +- ▁cama +- 不能再 +- だと思う +- 則 +- 鸭 +- ▁soutenir +- 幺 +- ▁Schlacht +- ▁chaos +- ▁meisje +- を行った +- ▁conjunt +- ▁Ansatz +- 皇后 +- 环境中 +- 님 +- 世紀 +- も含めて +- '98' +- ობ +- त्य +- ▁derecho +- ▁davis +- ▁Шу +- お酒 +- ▁verhindern +- ▁komunumo +- ▁mochte +- こだわり +- 设施 +- 天使 +- ▁schade +- позиц +- 9% +- пай +- ▁richmond +- ▁посмотрел +- eusement +- meester +- ỹ +- োৱা +- 久しぶり +- 候选人 +- ங்களில் +- ন্দ +- ▁Zusammenarbeit +- 少しずつ +- ▁Tausende +- 东方 +- いただき +- aquestes +- ▁gefällt +- гада +- 薇 +- ▁mathematics +- ▁처음 +- lago +- amara +- zentrum +- ▁forgiveness +- ▁Each +- ▁அதன் +- ▁Deci +- 他にも +- ▁mirth +- 建立了 +- Ɣ +- сор +- ▁Wörter +- Bahn +- ▁bajo +- ▁carne +- 可怜 +- thought +- größe +- ガラス +- ▁screaming +- ▁ändert +- 培 +- 给了我 +- અ +- ▁thôi +- ▁indicator +- ▁alfred +- ▁کنی +- ▁gezien +- جلس +- ▁disciplined +- ▁housekeeper +- 体操 +- 还挺 +- ▁submarine +- いきなり +- كرة +- ▁erneut +- ▁Medizin +- 资助 +- vå +- ▁lucru +- ▁разные +- ▁oars +- ливо +- 可惜 +- ▁Oxford +- Kampagne +- ▁generosity +- ▁무슨 +- ญ +- 縦 +- 偽 +- ível +- パワー +- ▁moviment +- 尽量 +- ▁universitat +- ME +- ễ +- ▁нравится +- ▁понимаешь +- ▁böyle +- 学术 +- こんにちは +- ▁kvar +- 摊 +- 棵 +- ▁hereafter +- Franc +- ▁prussia +- ஷ் +- 天哪 +- ▁Hügel +- 最新の +- ▁Korea +- ▁мөн +- ▁হয়ে +- ▁kaiser +- ático +- ▁mooi +- 晩 +- ึ +- ▁cambio +- holm +- ственно +- ▁implied +- ▁év +- 我个人 +- ▁jenny +- 就是因为 +- Ĉ +- を通じて +- ▁değiş +- ▁borrowed +- 是否有 +- ▁Tout +- ▁festgestellt +- きょうも +- ▁تک +- 短暂 +- ▁franklin +- ▁piled +- 还是要 +- 自主 +- یەک +- ▁contempor +- 狩 +- 불 +- نظام +- 亲戚 +- 活跃 +- 真心 +- 会让你 +- ▁کشور +- ování +- ▁stesso +- бег +- 临时 +- 八点 +- いろいろな +- घ +- 螺 +- க்கை +- ▁consultation +- ▁Wut +- ▁Personal +- ▁quedar +- ▁こうした中 +- ▁Ку +- ▁prolonged +- ▁folgenden +- ▁temporal +- ▁bleef +- ăng +- バター +- ▁Saya +- ▁detected +- ▁Про +- ▁translate +- 信念 +- асць +- ないんです +- ▁کوچک +- 両方 +- ▁contrari +- ▁அத +- ோம் +- お肉 +- ņa +- ▁Einstellung +- したいと思います +- ▁встреча +- wyl +- 侵攻 +- 我刚 +- ۱ +- 房地产 +- ষ্ট +- ▁ஆனால் +- 收藏 +- مثل +- ▁Philip +- ▁reliable +- ▁conspiracy +- ▁indispensable +- 日本海側 +- の歴史 +- ▁কোন +- ▁superiority +- 子弹 +- 的意见 +- ▁conqueror +- 帖 +- 迎え +- ▁одоо +- 優しい +- ическое +- wyth +- ▁одну +- wirkung +- ▁financing +- ▁ascended +- كتب +- 八月 +- ▁zoals +- ヶ +- ▁கட்ட +- ▁petty +- ▁cricket +- すぎて +- 得起 +- cross +- 加强 +- crypt +- ▁jünger +- ▁ຫວາ +- ▁considér +- ▁Studio +- вд +- 另外一个 +- ▁kennedy +- ▁castell +- าม +- ▁abrupt +- ▁buli +- identifi +- ▁disadvantage +- ▁නො +- ▁arasında +- ража +- ▁хотят +- ▁apron +- ▁damned +- 不在乎 +- ▁horace +- 帮助我们 +- communica +- жин +- 貸 +- たちに +- ▁complement +- ▁والم +- джи +- ▁Rick +- கிறார் +- ▁maximum +- อา +- ▁ҡара +- ▁lebendig +- ▁счита +- 毫 +- ▁mechanic +- ෑ +- ナンバー +- 餐厅 +- 援助 +- ▁khá +- ▁creu +- apport +- ▁continual +- 了多少 +- ところです +- 但我认为 +- ▁Villa +- ▁reagieren +- ▁нічога +- 筒 +- 贫困 +- ▁puerta +- ▁pathway +- 效率 +- 津波 +- ▁Europ +- ▁бесп +- ▁счет +- 对抗 +- 生物学 +- writer +- 認め +- ▁extravagant +- ▁umbrella +- ▁jullie +- ▁distressed +- ▁precisa +- 称为 +- ▁honorable +- ూ +- 伊斯兰 +- 尊敬 +- ▁clinging +- ▁бала +- льных +- pā +- ▁civilized +- 出てきて +- BI +- ▁apparatus +- ▁затем +- にわたって +- 道具 +- ▁Grenze +- ▁велико +- печат +- ▁babi +- ▁blunt +- ▁محل +- 漆 +- ছো +- ▁vegetable +- regierung +- かき +- ▁ocasi +- ▁lacking +- 颤抖 +- ▁thereupon +- 另一方面 +- 最後まで +- düğü +- 七点 +- basha +- bikora +- 共享 +- 存储 +- ▁clark +- 是什么意思 +- ▁schoon +- ▁Nahrung +- ▁Elektro +- ▁yapıyor +- ことば +- kibi +- ▁Tony +- hér +- 粮 +- 起床 +- :“ +- Râsete +- 萧 +- ハウス +- partei +- 分别 +- ▁principalment +- 戴着 +- ▁پرو +- occupa +- 部落 +- ▁favourable +- ▁expose +- 売り上げ +- ▁Marie +- 怪我 +- ▁практически +- ▁별로 +- 偷偷 +- ▁complexity +- eût +- vamo +- ▁automatic +- mysl +- ремен +- dimensional +- прям +- ▁Beweis +- 犠牲 +- нең +- anomena +- строй +- ▁طريق +- の間で +- ▁ethel +- 締め +- 只有一个 +- 分散 +- ▁alright +- プラ +- ▁approaches +- ြ +- 汪 +- овского +- человеческ +- ượ +- 発売 +- ▁quindi +- คน +- ▁diplomat +- ▁mulher +- 人才 +- ▁scold +- 灰色 +- 寸 +- 叙 +- ▁covenant +- ▁Mind +- ▁Four +- 气候 +- ▁kennt +- сер +- ▁pew +- guye +- валася +- ▁instructed +- ▁இல்லை +- 地看着 +- 国葬 +- ▁газар +- 掩 +- 筆 +- 艾伦 +- 飛ば +- ID +- ▁substitu +- tracht +- 名称 +- だと思って +- ▁mientras +- 相手に +- ▁Jason +- appropri +- ▁höre +- 捜 +- ▁தனது +- ▁مشکل +- بند +- 犹太 +- ジョ +- ▁Dienste +- 武装 +- ydı +- ▁இருந்தது +- ▁праз +- gemacht +- ▁feder +- 炊 +- 合理的 +- leuchtet +- ▁Bereit +- ▁taylor +- そうと +- ивают +- 惊喜 +- 知道吗 +- ▁constance +- あげる +- ворот +- 台上 +- plau +- 剥 +- 古老的 +- 也知道 +- ▁strategi +- ▁amateur +- ▁mettre +- 日军 +- んでしょうね +- ゥ +- ▁orleans +- 说出来 +- 眼里 +- ▁blunder +- あいつ +- 一个小时 +- ▁moist +- ▁teatr +- 以一种 +- ▁разве +- 欺 +- ▁vernünftig +- 疼痛 +- রের +- ▁Kohle +- géni +- ▁oyun +- ▁healing +- brä +- father +- 王国 +- 伸出 +- 就不能 +- 火山 +- ▁пару +- 最后一次 +- ▁Kö +- 巾 +- abaturage +- ▁defiance +- ▁москвы +- 観光客 +- 够了 +- ▁olw +- ▁açık +- ▁primi +- czas +- ▁المس +- ▁blev +- ▁sauber +- ▁voting +- ▁complicat +- ณ +- ▁través +- ▁optimize +- ▁melodi +- ▁lavoro +- ▁подожд +- ▁войны +- するのが +- ▁diminu +- と呼ばれ +- ▁самых +- ▁bijna +- ▁bildet +- つながり +- 棉 +- روس +- 始终 +- ▁yacht +- ▁packet +- šā +- しているんです +- ▁Wid +- ▁hose +- istisch +- ▁prezent +- ▁missionary +- ▁commonplace +- 駆け +- プロジェクト +- ▁circus +- クラブ +- ▁customary +- ▁exclusively +- 鑑 +- 枠 +- 吵架 +- ▁peine +- 一起来 +- 時まで +- いいね +- ▁mathematical +- 珍しい +- ▁иначе +- ▁depriv +- ▁venice +- ▁sitzt +- 留给 +- ▁Court +- ▁zooals +- ぷ +- ▁versteckt +- ▁stata +- ▁billig +- TA +- shima +- 树林 +- ▁iawn +- ▁plac +- ১ +- ▁memorial +- 在做什么 +- ▁thường +- ▁ladyship +- world +- 危険な +- ▁До +- の中でも +- ▁mostrar +- 昨晚 +- ▁appreciated +- ▁جنگ +- ▁bluff +- 庙 +- ▁emphasize +- ▁renown +- 沟 +- 陸上 +- 一点也不 +- lê +- сия +- 椅 +- œ +- 函 +- ▁admiring +- ▁sacrament +- 财务 +- 节奏 +- 礼貌 +- 广场 +- ▁implore +- ицы +- マリ +- 这个事情 +- いいのか +- があるので +- 年级 +- kiko +- ▁exam +- っていない +- ▁diameter +- ▁Palm +- бә +- 起诉 +- ▁ہو +- 大好きな +- ▁cetera +- ▁पर +- もう少し +- 瘾 +- 涙 +- вания +- ▁overflow +- ▁ожида +- 临床 +- ▁сябе +- männer +- ▁contradiction +- 吊 +- ▁사람들 +- ▁ساعت +- ▁العديد +- ▁никакого +- 的思想 +- ▁obstant +- andika +- ▁legion +- ▁cultiv +- ▁arriba +- ▁przed +- võt +- 行う +- ય +- ▁allerdings +- ogene +- schalten +- demokrat +- ▁traced +- ▁считает +- ▁produc +- 春天 +- ▁burada +- 赶快 +- င် +- ં +- ゼレンスキー大統領 +- ▁случилось +- ▁состав +- Ҡ +- ▁bemerkt +- 原本 +- 現金 +- Gerät +- のようなもの +- енә +- ▁Pur +- ▁kreativ +- ▁behauptet +- ▁للم +- ▁новый +- ▁hardware +- свет +- ければ +- 贫 +- 誰が +- ▁marque +- ▁stuart +- を見ると +- ▁Menschheit +- 深深 +- очку +- ব্য +- ▁roam +- ▁kujya +- 二百 +- 行不行 +- 慣れ +- ▁savu +- 原発 +- ▁hakkında +- 规矩 +- ▁stubborn +- ▁полно +- ▁übrigens +- ▁offenbar +- ▁tipus +- ▁strained +- madı +- ドン +- 朝から +- ロボット +- ▁verletzt +- 的说法 +- ண்ட் +- 尤 +- 我听说 +- 救助 +- 体調 +- ▁cooperation +- 做了一个 +- ▁junger +- 一点儿 +- ▁dusty +- 开枪 +- ▁Angebot +- 珊 +- ▁Тэд +- 义务 +- නු +- interest +- 血管 +- ▁trouva +- වැ +- истов +- ▁ҡал +- ģ +- ▁vulnerable +- ▁receipt +- 洗濯 +- تعلم +- 厕所 +- ▁conductor +- ▁schreibt +- ▁Verbrechen +- ▁замечательн +- ▁adviser +- ▁hostess +- 挙 +- ע +- ▁cylinder +- ▁امروز +- ▁treason +- ▁Sever +- ıyla +- ▁Vogel +- ▁wertvoll +- 书记 +- 跃 +- ▁gravel +- ▁preliminary +- ▁bảo +- 証拠 +- ▁solved +- ▁будто +- わよ +- 果然 +- вацца +- ことになりました +- 媒 +- یں +- ▁accuracy +- ▁commodity +- ▁District +- بيع +- ĵ +- ▁implemented +- 三月 +- バレ +- ▁краін +- цией +- 能看到 +- 或其他 +- 嗨 +- അ +- ▁belangrijk +- 舟 +- 포 +- 償 +- ▁komplexe +- ▁basketball +- ▁Sekunden +- ▁noisy +- ▁interruption +- 说完 +- ケア +- illus +- ▁compliance +- ▁اتفاق +- ▁psalm +- ▁electrical +- ენ +- ▁vragen +- ▁shun +- 逮捕されました +- ▁severity +- 之内 +- 么 +- half +- 找出 +- ٍ +- ▁موقع +- ▁Signal +- 我问你 +- ▁pobl +- цяг +- 契 +- 贯 +- ▁făcut +- ▁Đây +- 阴影 +- 南京 +- ▁pouvez +- ▁Spieler +- евой +- kipe +- тап +- 花钱 +- ▁doktor +- ▁вперед +- ▁обязательно +- ▁صحبت +- iyoruz +- 製品 +- ஞ +- 抬起 +- 合意 +- ▁quả +- ▁coch +- ковский +- 儀 +- する方針 +- ▁fringe +- geschrieben +- が起きた +- 価 +- ▁государство +- buye +- ▁внутрен +- 疑いが持たれています +- ▁мама +- угл +- ாவது +- になれ +- ▁salad +- 什么都不 +- ▁ghastly +- 匆忙 +- 忽视 +- ▁universities +- ▁Handlung +- cull +- ▁maggie +- ▁Papa +- ̀ +- 旺 +- ▁zerstört +- ▁vapor +- ▁bafite +- 欲望 +- ▁sicherzustellen +- ▁Voll +- টো +- ▁материал +- ▁gemein +- ▁sorrowful +- 诗歌 +- ibindi +- 保安 +- ▁đấy +- 不管怎样 +- ▁automatically +- まっている +- ムー +- ▁Shu +- 怎么着 +- 苏联 +- ▁Jersey +- ▁произошло +- ▁Bạn +- ▁Viertel +- exclusi +- 售 +- 唯 +- 取代 +- ▁handeln +- ▁blur +- 相机 +- 种植 +- ▁hark +- 污 +- ▁псих +- ▁ritual +- ▁потеря +- 你放心 +- ▁rejoiced +- طلب +- ▁visage +- ぶつ +- operation +- ▁камен +- ▁conseil +- ▁liable +- 蚊 +- эргэ +- ▁यस +- работал +- üßt +- ランク +- ▁occhi +- ▁Мин +- ▁beendet +- ▁kitten +- ▁зуб +- ▁Kenya +- ▁ikibazo +- ▁أيضًا +- デジタル +- ▁abbey +- 会觉得 +- ично +- ிருக்க +- を通して +- 那不是 +- жыць +- 通行 +- ▁longue +- ▁Heimat +- ▁intrigue +- قدر +- бен +- ▁joven +- bücher +- 山本 +- ▁priorit +- 承受 +- 结束时 +- wezi +- ▁regal +- ▁emit +- ▁анти +- 判決 +- ኝ +- ▁eyebrows +- ▁bicycle +- ▁çıkar +- дина +- みそ +- பர் +- 争取 +- 个性 +- 五分钟 +- ก็ +- ▁смотри +- kontroll +- 밖에 +- ▁exalted +- 消え +- ▁gebeten +- ER +- ▁прибыть +- 弾道ミサイル +- ▁решения +- ▁அவள் +- 火星 +- 怖 +- 预期 +- 衡 +- störung +- усь +- グルメ +- ▁minority +- ▁neighbouring +- ▁Leidenschaft +- ▁oppressed +- 你会看到 +- gène +- ▁tutor +- 陷 +- ▁äußerst +- 嘞 +- ▁començar +- objekt +- ▁proverb +- கம் +- anstalt +- ▁holiness +- ▁шүү +- ▁государства +- ▁мус +- ▁pregunt +- 砍 +- ▁überzeugen +- ▁다른 +- слух +- ▁twig +- зад +- analyse +- そうした +- 战场 +- 网友 +- ლ +- Ḥ +- ▁njegov +- ▁repeatedly +- ▁اولین +- ▁любой +- ▁archer +- ▁اور +- 積み +- étude +- 我总是 +- ▁zweihundert +- ▁komis +- 抚 +- ごとに +- ▁resurrection +- 杠 +- ▁Mehrheit +- ▁squadron +- frica +- 合理 +- 農家 +- sensibil +- ទ +- ▁структур +- ▁бүтэ +- 引き上げ +- ▁Innen +- нікаў +- 加工 +- ▁производ +- 单身 +- 呈现 +- ▁Schnee +- ▁برنامه +- ▁gleaming +- 応え +- ▁creeping +- ▁ligne +- ▁працу +- ▁corri +- เส +- 現れ +- ▁перш +- очки +- ▁genutzt +- 皆様 +- 総裁 +- ▁robinson +- ▁часов +- нена +- ▁kettle +- 诗人 +- شاهد +- äß +- ▁muerte +- ▁fake +- ▁société +- ▁микрорайон +- 밥 +- ブラジル +- ▁desirous +- 一个男人 +- ▁spectrum +- springen +- 孙子 +- ▁सम +- りません +- 不太好 +- stāv +- ▁prediction +- したところ +- ▁coil +- ▁Fast +- ゴン +- コーチ +- ▁proclaimed +- ▁jugador +- ▁توانم +- ஊ +- 体内 +- ▁interference +- 四百 +- が増えて +- 打つ +- wickelt +- ▁Мне +- ▁kvin +- дро +- ▁crab +- ▁Literatur +- ▁shawl +- ▁outfit +- 倾听 +- 不良 +- ▁Chef +- zuhalten +- 普通に +- ▁そんな中 +- ▁groaned +- の方は +- 艇 +- ▁Petr +- ▁Wach +- bole +- еньки +- 难受 +- 発信 +- 及其 +- ▁scatter +- ▁drifted +- ▁rejoicing +- ▁друзья +- ▁karakter +- 是一件 +- assemble +- を進める +- でしょうね +- ▁Befehl +- 直前 +- జ +- 亚马逊 +- 社交媒体 +- miseks +- 设计师 +- ▁majoria +- 很可能 +- 也许是 +- испол +- 飘 +- ø +- أصبح +- ▁orthodox +- ▁thieves +- 鹿児島 +- 难以置信 +- 明天早上 +- jū +- национальн +- 热爱 +- ுகிறார் +- ▁piety +- あれは +- 农场 +- AC +- ▁مرا +- ▁eleanor +- ▁prêt +- ▁کتاب +- 大谷選手 +- 行事 +- ▁allusion +- ▁yavuze +- ▁trá +- ▁суу +- ▁gọi +- 雀 +- மோ +- 不然 +- かしら +- 优秀的 +- blé +- リアル +- gewinn +- 导致了 +- 辩 +- 旬 +- リオ +- iddwa +- ブランド +- 内閣 +- ▁وقد +- 册 +- ▁concession +- 選択 +- ▁Cambridge +- 込 +- لەر +- ▁ĉefe +- ▁dragging +- ▁sparkling +- ▁consistently +- 吹き +- ▁пакуль +- holz +- ドア +- 做得很好 +- attend +- ▁benjamin +- ▁shifted +- ▁Spur +- ваюць +- ▁hynny +- ▁elevation +- 軽く +- ▁trình +- ボタン +- ganya +- операци +- ▁رسید +- 又不是 +- ▁frenchman +- 看着我 +- ▁suppressed +- kijk +- ▁perquè +- ▁জন্য +- ▁remarkably +- aĝo +- ▁ernest +- 军官 +- ▁download +- mette +- ▁Microsoft +- 沖 +- 勧 +- archiv +- سرع +- 一支 +- ひとつ +- ▁цаг +- dessus +- 当前 +- 释 +- wendung +- されたのは +- 意志 +- に近い +- 这是什么 +- スペース +- ▁ruling +- كۈ +- genomen +- ▁malheur +- سلام +- ▁выборы +- 県内 +- ▁australische +- ▁untersuchen +- 鎌倉 +- 促进 +- ▁Geschenk +- 诊断 +- ▁jeanne +- waż +- ▁groom +- を受ける +- ▁lettre +- ▁adjacent +- 砖 +- 挟 +- эль +- ▁presque +- 远远 +- 地理 +- 的感受 +- ▁Eric +- читыва +- concili +- ▁basil +- 配信 +- ▁desenvolup +- 桐 +- 縫 +- 跑到 +- 同じように +- ▁freuen +- 諦め +- 雨雲 +- ▁selben +- لج +- 三次 +- 平方 +- ▁vaig +- ▁Social +- カナダ +- ▁москве +- 定位 +- すり +- ▁getrennt +- bbling +- ▁syr +- ▁integrity +- UN +- پە +- エース +- ▁Verbraucher +- 舎 +- ▁caracter +- 見てみますと +- விய +- 听过 +- 谁能 +- 深度 +- 术语 +- と思うので +- 消除 +- 撑 +- ▁şimdi +- ▁savoir +- 代わりに +- حصل +- ▁Medikamente +- やっと +- ຫຼ +- を獲得 +- ▁pitiful +- ▁легко +- ▁besiege +- 有趣的是 +- 集合 +- generation +- ▁publisher +- жым +- ಡ +- 穆斯林 +- ▁declaring +- ビジネス +- ▁elkaar +- ▁visibility +- 争い +- ▁scary +- 慢点 +- ▁leiten +- って言った +- 我也不知道 +- ড়ি +- ▁westward +- ▁repress +- ▁fehlt +- ृ +- ▁installed +- ▁сожалению +- ▁언니 +- 雇佣 +- ▁repos +- ▁парк +- ▁accuse +- سپ +- みたいな感じ +- 飛行 +- 阿里 +- ▁demonstra +- ▁ridicule +- ▁மிகவும் +- 脑袋 +- ▁Company +- চে +- ▁Senator +- AT +- ▁veranda +- 征服 +- 布里 +- يَ +- 丈 +- ▁சேர் +- 崇拜 +- ivamente +- ▁Water +- ▁glimmer +- していること +- II +- 衛 +- 喜剧 +- 手紙 +- ▁집에 +- ējā +- ▁Block +- ▁väl +- undneunzig +- 詞 +- ▁слов +- ▁Kalifornien +- ει +- haza +- 趣 +- ▁Basis +- ▁Cela +- قۇ +- 动议 +- 是多么 +- やろう +- ▁neighboring +- ▁Hast +- алда +- вание +- どこまで +- ▁lavora +- ▁erstellt +- ▁кеше +- ▁Perspektive +- ▁cualquier +- ▁chemistry +- ліз +- ▁inherited +- もち +- ▁surge +- 消費 +- ώ +- ▁erforderlich +- 須 +- ▁обнаруж +- ▁descending +- avons +- mbri +- ▁televisi +- ▁europäischen +- ▁Está +- ▁rocket +- druž +- ခ +- ▁맨날 +- 従業員 +- ▁среди +- غو +- 穿上 +- ▁phụ +- 任何一个 +- ぱ +- вших +- ▁wizard +- レシピ +- 谁知道 +- ▁batteries +- historie +- ▁laquelle +- ▁svět +- тыя +- 질 +- ▁hợp +- ▁tiếp +- ▁نزدیک +- スイーツ +- ▁vermeiden +- ▁striving +- ▁aufmerksam +- 上课 +- 機械 +- ▁empower +- 決して +- 开门 +- ▁Gerechtigkeit +- ▁힘들 +- 療養 +- ▁Three +- existent +- abhängig +- ▁чём +- ▁peggy +- пресс +- ވެ +- ▁systematic +- ▁než +- ▁simplement +- ▁verwirrt +- せっかく +- されてる +- 赞助 +- 预防 +- ▁två +- 赋予 +- ▁существует +- ▁utility +- ▁Wesen +- soever +- ▁Alors +- 業務 +- 爪 +- ▁beschrieben +- ▁supernatural +- ▁فإن +- ▁analyze +- ▁rezult +- ອນ +- 流动 +- ▁listeners +- ▁futur +- Liber +- 残疾 +- 厌 +- 험 +- PA +- 輸入 +- يص +- ▁automatisch +- 欧米 +- ▁Belgi +- 引导 +- わけですよね +- ▁영화 +- ▁skilful +- ▁называется +- ēl +- دۇ +- sparen +- 歌词 +- ▁Schließlich +- 芒 +- 分かんない +- ப்படுத்த +- ▁Republic +- 公表 +- ীয় +- 几分钟 +- 晚餐 +- 桌子上 +- schlüssel +- скія +- ▁Цяпер +- ▁музыка +- ▁Russland +- ▁найд +- ▁merciful +- 迹象 +- ▁printing +- ▁growled +- ▁речь +- 気分 +- 可能性があります +- ▁plunge +- ▁பெண் +- ▁melhor +- ání +- ▁Fä +- ▁transit +- ▁гэсэн +- ▁Gesamt +- ándose +- artisan +- ▁Clark +- кле +- What +- 一个巨大的 +- ▁Dreh +- raad +- 给出 +- ▁perished +- ▁overthrow +- ▁mantle +- rides +- studie +- 为什么会 +- ▁lingered +- 完整 +- ▁scroll +- 底部 +- ▁Treppe +- provi +- 前往 +- 福利 +- 钢琴 +- صنع +- ▁подход +- だったので +- 遗产 +- ▁infirm +- පු +- するのか +- ▁Geschwindigkeit +- ▁contemplation +- ▁loneliness +- 芯 +- 토 +- 据说 +- acca +- நா +- ▁koran +- ▁schimb +- ңә +- ▁Тут +- ▁plug +- ▁torre +- decorat +- preservation +- ސް +- ▁трудно +- を巡る +- ائية +- 黒い +- ▁Mario +- ▁handled +- ▁biological +- 场所 +- ▁تأ +- 哲 +- ▁brightness +- យ +- ▁October +- ▁adjoining +- ▁irgendwann +- 虹 +- ▁schafft +- 问一下 +- ▁ethics +- 微妙 +- 総合 +- reißen +- ▁показыва +- ▁sparkle +- ▁faci +- ▁Brasil +- marsch +- ▁entrepreneur +- ucci +- 出会い +- плеч +- 목 +- ▁Schauspieler +- ▁largo +- 评价 +- ▁sniff +- ▁recibi +- こんな感じ +- 村庄 +- губ +- ▁bölge +- ▁چطور +- ▁avenge +- 派遣 +- ▁đồng +- ▁новая +- ▁achtzig +- 齢 +- ▁practised +- 老百姓 +- ▁plaça +- があれば +- 座位 +- 安倍 +- 车辆 +- 良く +- ▁друга +- ▁дежур +- ौ +- 自動 +- ▁وهو +- ▁fué +- legation +- あした +- ▁pip +- 디 +- ▁intimacy +- ▁пункт +- ▁ecosystem +- 网站上 +- ложить +- 凭什么 +- ▁aldı +- ▁wambaye +- フィー +- 発電 +- ▁regent +- になってくる +- 飛行機 +- stunden +- ▁Emma +- 回事 +- 比尔 +- 颠 +- ▁Gouverneur +- ▁delicacy +- ▁обсужда +- 絞 +- ▁removal +- 歩く +- getrieben +- ▁basta +- ▁ясно +- ковская +- ▁sasa +- ベン +- 职位 +- 盈 +- ▁furchtbar +- 致力于 +- 繁荣 +- 整備 +- ▁übernehmen +- ▁අප +- われわれ +- 小型 +- 深夜 +- ▁Manche +- 児童 +- 仕掛け +- ▁একটি +- ▁Bush +- ҡан +- ▁alleged +- 走路 +- を紹介 +- ▁promet +- େ +- ▁دختر +- ホームページ +- 約束 +- யு +- を起こし +- larına +- prinz +- ▁بأ +- 奋 +- 四月 +- 你会发现 +- 福島 +- 墨西哥 +- ▁latitude +- ▁እን +- 我就想 +- ▁horseback +- ரெ +- рская +- っていきます +- 哟 +- ▁psychische +- ▁dainty +- ▁inquiring +- ▁başladı +- ▁خاص +- 하니까 +- 好听的 +- ▁chiama +- ▁knocking +- ▁carelessly +- 达成 +- ▁понят +- ▁precept +- േ +- 债务 +- ▁schlimmer +- 最重要的是 +- 姥 +- 枯 +- 見た目 +- 转身 +- ħ +- 破壊 +- ▁Wichtig +- 典型的 +- ▁lawful +- ▁caravan +- 来找我 +- ▁самым +- rühm +- 凍 +- 描いた +- ޅ +- 新規感染者 +- 依頼 +- 不算 +- ▁forsake +- 密切 +- schieß +- ▁semana +- kuti +- ীর +- ▁geschafft +- ▁président +- ▁socrates +- 頑張り +- ▁malice +- က် +- ▁Million +- ▁revolutionary +- моў +- ▁tavern +- 島さん +- чала +- ▁Sco +- څ +- ▁Griff +- の様子を +- ▁fantastisch +- ▁максим +- ▁verlangen +- ▁verdict +- キャンプ +- を抱え +- 時間帯 +- ▁너가 +- ื +- ペア +- ▁шоссе +- 男の子 +- ▁Muslim +- 抑 +- ▁Dazu +- моло +- 搁 +- 秩序 +- ▁Schluss +- берег +- ▁რომ +- ▁поднял +- ▁athlete +- 慢慢地 +- pharma +- ▁bobby +- entreprise +- すき +- ▁könne +- ▁realizing +- 交换 +- ▁metaphor +- ▁Investor +- ્ય +- ▁nadie +- たいと思います +- ▁stitch +- ▁dimly +- คร +- 即便 +- 一応 +- ▁pedra +- ▁interface +- ▁قىل +- ància +- 把它放在 +- アーティスト +- ▁wußte +- spitze +- 很喜欢 +- って思って +- 艘 +- კა +- を訴え +- ▁Umugabo +- ▁shattered +- garuka +- 回复 +- saison +- 友人 +- biza +- ▁resign +- ▁renewal +- ছেন +- を止め +- ▁Dach +- 半島 +- ▁removing +- 是什么样子 +- 有人说 +- ビア +- 会話 +- 学位 +- ▁racing +- 哨 +- ▁секрет +- ▁pubblic +- скры +- ▁아직 +- geschnitten +- angwa +- 价值观 +- czą +- 有这样的 +- ウム +- باب +- өс +- ホント +- ▁cynnwys +- ▁restructuring +- 共和国 +- 亚洲 +- ▁metod +- ▁نفر +- ▁thích +- ビール +- zieh +- 業界 +- dringen +- niedrig +- と見られる +- ▁qualche +- 失礼 +- ฟ +- Ž +- ▁зүйл +- ▁measurement +- фарм +- เร +- ਲ +- ▁гораздо +- 鹏 +- ▁ہے +- sabye +- īga +- ходзіць +- öffentlich +- 暑い +- ▁roland +- ▁tariff +- 皆さんも +- ▁我想听 +- న్ +- 練 +- 冤 +- 阿拉伯 +- 幻灯片 +- ▁massacre +- 봤어 +- ▁Beine +- سوف +- ▁kritisch +- ▁frock +- ▁разных +- ▁Mama +- സ +- 拾 +- 録 +- ▁Đó +- ▁Betracht +- 同伴 +- 使命 +- ▁consisting +- бло +- ▁daddy +- ▁matrimoni +- プログラム +- 明智 +- 真诚 +- ▁rotten +- ▁convertir +- ▁смерт +- 墙上 +- 服用 +- appelle +- ▁twain +- ▁Dunkelheit +- ▁Identität +- ▁pharaoh +- ▁structural +- 겨 +- ธ +- سط +- ▁будуць +- 多年来 +- やってみ +- ▁Arthur +- 发行 +- 童年 +- 忘记了 +- ▁whim +- æ +- ▁என்பது +- ▁quivering +- 先制 +- 依靠 +- 那天晚上 +- тычна +- 兔 +- kārt +- stift +- 感染者数 +- ▁алло +- ▁влия +- 嫌疑人 +- ▁olympi +- ▁помню +- ▁توانید +- ▁keenly +- ▁Pflege +- กับ +- ▁около +- 広げ +- bido +- ▁Später +- アナウンサー +- 린 +- ছিলেন +- ટ +- ▁supplier +- ▁geistige +- 解散 +- ▁нашем +- 深く +- わかった +- Direct +- писать +- ▁ўсе +- ▁stimulate +- 六点 +- 稽 +- おすすめ +- 拝 +- әү +- 埃及 +- ▁avea +- ▁quoth +- ▁принял +- simila +- ▁posible +- 추 +- ▁città +- 收获 +- ▁Pflicht +- ▁Sehr +- ▁constable +- gaciro +- 通道 +- ▁jasper +- 된 +- ۇن +- ▁Avenue +- ▁hurled +- ▁چهار +- ıdır +- ▁пасля +- сцю +- ▁falsehood +- 好消息 +- ▁Golf +- 斯顿 +- ▁boundary +- 恰 +- ৌ +- β +- ▁beberapa +- 銭 +- uɣal +- ▁حو +- ▁stripped +- ałem +- சூ +- ▁Kommentare +- ▁countless +- გი +- 下がり +- għ +- ▁있다 +- 祈 +- ▁obedient +- ▁precedent +- ▁dialect +- ště +- を目指して +- ▁charley +- веж +- に警戒 +- どうなって +- 玄 +- 얘 +- ગ +- ▁Innovation +- ▁venerable +- ▁Schaden +- గా +- ▁deployment +- ▁discharged +- ▁bribe +- ▁choked +- เด +- ницы +- ▁Бер +- ▁shareholder +- ▁irresistible +- 색 +- ▁ertragen +- ▁دانش +- 猜测 +- håll +- ▁skr +- ▁начала +- jú +- حاول +- ិ +- ▁شدند +- してくれた +- ▁kombin +- درس +- ▁cuanto +- ▁fakt +- ▁loaf +- 후 +- 予測 +- 治愈 +- 细菌 +- escence +- ▁Diana +- 辰 +- ▁ermöglichen +- ▁области +- ▁apprehend +- ▁sincerity +- ▁Marine +- ▁conduc +- ▁глаз +- मि +- 字母 +- 午前中 +- 不止 +- ▁atrodas +- ▁встрет +- ▁coneix +- リップ +- europäische +- träger +- 日期 +- ▁splendour +- 準決勝 +- ▁Kauf +- ▁equipped +- 伊朗 +- ▁Verfassung +- ▁racial +- ▁wistful +- يست +- اقتصاد +- ▁begrijp +- ▁überprüfen +- 挣扎 +- ▁вижу +- 聊聊 +- ▁greet +- 躁 +- ק +- 创伤 +- ▁ведаю +- 旅程 +- ▁llegar +- етесь +- ▁mbili +- 寒い +- ▁calor +- ▁conoce +- ▁worte +- undsiebzig +- ▁stumbled +- 剣 +- ▁займа +- 楼梯 +- 市长 +- 低下 +- ▁вспомни +- ▁holmes +- 未知 +- ことになります +- ډ +- 辨 +- ▁contemptuous +- '......' +- ▁darted +- zustand +- ▁грани +- ビデオ +- ▁soothe +- 짜 +- 创始人 +- ▁imprisonment +- ▁intensely +- 在乎 +- leɣ +- traction +- ificació +- fellow +- ంది +- foli +- 対決 +- بِ +- 长官 +- 머 +- ▁Ankaŭ +- 纯粹 +- ▁unmittelbar +- ▁Ursache +- овское +- ▁granite +- ▁avem +- 一生中 +- گەن +- анс +- ▁epic +- ▁virtually +- ▁tylko +- を防ぐ +- ▁podia +- ▁snatch +- 替代 +- 費用 +- 购物 +- 組み合わせ +- 長崎 +- ▁لذا +- 더 +- ▁واقعا +- ▁maior +- ▁ieder +- をはじめ +- 点钟 +- ელ +- ▁Kontext +- ▁Verbesserung +- サポート +- geleitet +- ތަ +- ▁wickedness +- ▁kugirango +- 装饰 +- ▁azul +- コロナ禍 +- 集体 +- ▁Null +- Europe +- 幹部 +- ▁Umfrage +- 澄 +- স্থা +- ▁cafe +- 展开 +- пак +- ▁приходит +- 携 +- 教えてくれ +- 晚安 +- 夫妇 +- εί +- 如果不是 +- 谈过 +- ▁controversy +- ▁nyingi +- ▁lần +- まとめて +- につながる +- ようになりました +- ▁beeinflusst +- ▁Italien +- ▁classical +- スリー +- bilidad +- нув +- ピーク +- ▁erleben +- と述べ +- ▁humid +- 海军 +- brennen +- ▁henceforth +- ▁گرفته +- 栄養 +- йшоў +- ▁famine +- 之所以 +- ▁improvis +- жә +- ▁المست +- ▁burial +- ів +- ешься +- 冷たい +- 实话 +- ▁Fou +- ▁przez +- ▁Mathematik +- ▁furnace +- ▁ອື +- 舞蹈 +- ▁Abteilung +- ḥem +- ▁Fair +- ▁avut +- ▁dringend +- ▁Lincoln +- ▁вариант +- ▁bemerkenswert +- 困扰 +- ంద +- ▁fertile +- 另一边 +- ▁sangat +- 基金会 +- 注文 +- між +- ▁Sagen +- 告诉她 +- ಹ +- ▁instinctively +- อย่าง +- 恳求 +- 製造 +- ▁gratify +- ぼく +- ▁grit +- ▁Anderson +- ▁turtle +- ▁unusually +- 赢了 +- 会导致 +- ▁Karl +- ▁Wetter +- gültig +- ▁römische +- 摄影 +- 吃完 +- ▁declara +- '250' +- 团结 +- 每当 +- 知ってる +- 酵 +- ▁Kapital +- 职业生涯 +- 重症化 +- вернуть +- ambaye +- 洪水 +- observa +- ွ +- スペシャル +- ▁equation +- 恭喜 +- ▁инде +- 宪法 +- ▁northwest +- ▁Müll +- ▁oyster +- ▁devons +- 几年前 +- ந்தது +- ▁Verteidigung +- ミー +- ▁Details +- ▁gewann +- 蛋糕 +- ▁Kleid +- つながって +- ▁combina +- 被迫 +- ▁geldi +- ▁confronted +- 僵 +- 季节 +- ▁그건 +- ▁soothing +- ത്ത +- ▁хэрэг +- 牛肉 +- ▁papel +- ▁Meeres +- ▁Fox +- ▁Darüber +- 偏见 +- メール +- お茶 +- 卡尔 +- MA +- Tool +- 扮 +- ▁crise +- ▁efficiencies +- ▁participants +- ▁refusal +- ▁알바 +- ņēm +- ▁여기 +- BM +- école +- ▁upgrade +- ▁superb +- ते +- 言わ +- ▁черт +- ▁господин +- ▁fireplace +- ▁Campus +- ▁Hollywood +- ▁experiencing +- 震度 +- ▁никого +- ▁системы +- 可靠 +- klima +- 帽 +- 誕生日 +- ▁видим +- ブルー +- 惯 +- ▁biology +- ▁annoyance +- गा +- 回去吧 +- に入れて +- vogel +- ▁современн +- ▁Wolf +- சோ +- 失踪 +- ▁spill +- 埃尔 +- 这让我 +- 大众 +- チュ +- ▁ignored +- 变得更加 +- ▁beforehand +- ై +- ▁anticipation +- ▁imprisoned +- 伴侣 +- トランプ +- ▁ilgili +- ▁பண்ண +- ▁maggior +- ▁hydro +- ▁unexpectedly +- ▁opportun +- ▁jî +- 肢 +- ባ +- 孫 +- ▁entscheidend +- ▁விளையாட +- ▁salud +- 英語 +- ▁смысл +- কো +- ▁fui +- ▁pike +- こんなこと +- 分野 +- 艳 +- ը +- ▁staggered +- ▁League +- னால் +- 不幸的是 +- Datei +- mdash +- ▁cedar +- 部隊 +- おうち +- ▁biraz +- 慰 +- 拥 +- Community +- ▁gouvernement +- 暮らす +- ▁drog +- ▁இசை +- 打印 +- ▁turkish +- 过程当中 +- ▁кел +- М +- 这是关于 +- ▁barber +- ▁kinh +- ▁bezeichnen +- 松本 +- ▁subordinate +- 嘲笑 +- まれた +- 包围 +- 非法 +- 買い物 +- Ɛ +- ▁pequeño +- 忽略 +- 猛烈 +- kundig +- ▁бич +- ▁stockings +- 終わって +- бежал +- 王爷 +- าร +- ▁அல்லது +- ▁moore +- 跟你们 +- ▁인제 +- ▁Kiel +- ▁lúc +- ▁apology +- ロシア側 +- ▁eĉ +- が出ています +- 措 +- 昂 +- ແລ້ວ +- ▁phantom +- ▁població +- 吉尔 +- わかって +- getreten +- ▁exceeding +- ▁Management +- ▁Şimdi +- 虚拟 +- 这段时间 +- ▁communion +- っきり +- 植え +- 这个过程 +- ુ +- お伝えしました +- ▁встреч +- ▁besuchte +- ৰে +- したのが +- が発表され +- 胀 +- ▁remnant +- したのです +- нис +- mıştır +- ▁شدن +- ▁colleague +- 抑制 +- 润 +- ▁президента +- 環 +- 伞 +- ▁tecnologia +- ▁последние +- ▁restoration +- あらゆる +- まいります +- ▁qualcosa +- fleck +- ▁بیمار +- ▁vegetation +- ▁distracted +- ▁hamlet +- თი +- schneid +- satisfied +- నే +- கொள்ள +- bwenge +- ▁எனக்கு +- 玫瑰 +- なければいけない +- だからこそ +- 継続 +- ▁aufgewachsen +- ▁explicit +- ული +- ▁nightmare +- komeje +- 书籍 +- 려고 +- burton +- bär +- ▁chama +- girl +- பிடி +- 深圳 +- ▁Küche +- 实力 +- govor +- 努 +- ▁собственн +- ▁або +- 俄 +- ▁affliction +- ▁chancellor +- ▁suivant +- ▁Beide +- 輸 +- 电池 +- стоян +- ▁babylon +- ▁Ça +- こともある +- ▁kız +- ▁scoundrel +- ▁vorbereitet +- ▁apologize +- 折磨 +- ▁pierced +- ساعد +- ▁protector +- ▁lydia +- ▁connais +- ▁actress +- 患有 +- ▁tromp +- ▁rejoin +- ▁Kenn +- ▁quién +- 蕾 +- 격 +- わかりました +- を含め +- 反馈 +- ▁grandeur +- ▁maud +- ▁Pfund +- 几周 +- 格雷 +- しません +- ivität +- ▁brace +- ▁trọng +- 루 +- tempo +- گذاری +- ▁পরি +- liegt +- ▁Bang +- 婷 +- ▁Vietnam +- ▁cœur +- ▁doppelt +- へえ +- 言ってる +- ▁już +- 收到了 +- 幽 +- ▁nötig +- ▁четвёртая +- 민 +- ים +- 介護 +- ▁людзі +- گران +- ங் +- 家具 +- 動いて +- ▁isaac +- ▁першы +- সব +- RO +- 坐下来 +- ▁Investition +- ▁verzweifelt +- ▁Maschinen +- ▁솔직히 +- origen +- だけではなく +- ▁خب +- 遭遇 +- ▁crave +- 更快 +- ▁effi +- 大爷 +- 黙 +- ▁Canadian +- ▁aufgeregt +- 绅士 +- pathie +- 布朗 +- ▁devient +- 返回 +- ▁ooit +- 优秀 +- ▁Protest +- ▁predecessor +- 預 +- 티 +- ▁Stärke +- ▁dirige +- ▁sáng +- ることができます +- ▁бывает +- ▁faisait +- يقة +- 所以如果 +- undfünfzig +- 尔顿 +- 彦 +- built +- ้น +- держать +- ▁хамт +- ▁prodig +- යෙන් +- ια +- 椒 +- ▁tyranny +- ▁않아 +- ▁evolve +- ▁proprio +- ▁없는 +- ▁bombard +- ▁Ohio +- ырға +- 역 +- gespräch +- ▁хамгийн +- ▁мистер +- 困難 +- ▁Thu +- ほかにも +- therapie +- ▁revolu +- バイク +- ▁finanzielle +- 辩护 +- ▁scrub +- ▁judging +- ▁freue +- ▁крем +- wash +- 来到这里 +- 逃走 +- ▁última +- ▁انسان +- ▁Lä +- ▁müde +- 加盟 +- ணை +- 西安 +- 土著 +- ▁ministre +- 役割 +- ▁geholfen +- ▁hết +- ▁Madrid +- ▁Stuhl +- 疑問 +- 昨天晚上 +- 我的朋友 +- 跑步 +- ▁баб +- corp +- گشت +- ▁knapp +- 要素 +- Restaurant +- ▁kürzlich +- ▁voluntary +- ▁член +- ▁angst +- ▁ubwa +- ▁wartete +- ▁inhabited +- 分ほど +- 汤姆 +- ▁трав +- と見られ +- 初め +- গ্র +- ตร +- ▁پسر +- ▁woher +- koop +- technolog +- stelling +- 巢 +- ▁Michigan +- ▁hamilton +- 浑 +- iPhone +- ▁gekauft +- ▁아닌 +- ▁девочк +- ▁министр +- озер +- ▁boundaries +- ▁exploring +- シャン +- фар +- ▁repel +- バンド +- ▁volont +- ▁позвони +- ▁employee +- ▁trobar +- ▁paddle +- 黛 +- ▁обраща +- ▁identi +- ▁Einkommen +- ▁radiation +- راض +- 动手 +- ▁chú +- stehenden +- 递 +- ▁mcc +- 收看 +- ▁Clinton +- ▁Vorsitzende +- 运输 +- '900' +- ▁sincerely +- ▁Küste +- matur +- 取る +- 던데 +- ▁specialist +- ケン +- 搬到 +- ▁voet +- zulassen +- ▁ankoraŭ +- ▁grinned +- ▁municipi +- ▁zweimal +- ▁үҙе +- 抗議 +- ▁gorge +- ▁имею +- ▁Weltkrieg +- ируют +- ▁Patri +- ▁settlers +- ▁بچه +- 傅 +- قليل +- દ +- ▁Dimarts +- ▁오늘 +- ▁공부 +- ▁Вось +- ▁crawled +- ▁suspend +- ▁daudz +- 申し上げ +- ▁durfte +- ▁brake +- チン +- ぽ +- ▁Master +- ▁certificate +- ▁страшно +- ▁statute +- ▁Kaiser +- ▁Beau +- 有名な +- ҟ +- ẹ +- ▁profi +- ▁popularity +- 饶 +- ▁repetition +- ▁sechzehn +- effizient +- 差距 +- ▁cobert +- 突出 +- 选手 +- ▁bleeding +- ▁рабіць +- فرد +- 做些什么 +- ▁patrol +- 升级 +- 九月 +- ▁однако +- 味わい +- 微信公众号 +- 끼 +- ▁얼마 +- ▁фильм +- ▁drilling +- ström +- ▁মই +- 話し合 +- ▁plateau +- ▁komuni +- 今天早上 +- ▁tumor +- stritten +- 折り +- 当たり前 +- 時刻 +- ▁inevitably +- ▁kontraŭ +- ▁liệu +- 巣 +- 迷惑 +- ▁fascinated +- ючы +- 発展 +- 解答 +- ▁Doktor +- 지만 +- ▁wolle +- ▁gerçekten +- hagi +- brett +- 寨 +- ▁انتخاب +- ▁battalion +- ▁Európ +- 岸田総理大臣 +- ▁средств +- 巴黎 +- 站着 +- 有很大的 +- 越多 +- ▁sigui +- ▁жар +- 认为这是 +- ▁Schön +- ってきます +- ▁natuurlijk +- église +- 贈 +- ރި +- ▁specio +- ▁yankee +- 言われる +- 報じ +- ▁autour +- ▁Estados +- 也是如此 +- сцен +- IM +- ▁Pont +- வர்கள் +- ▁Notiz +- 感知 +- 如果你愿意 +- ỡ +- ▁어떤 +- グリーン +- ▁специальн +- ▁2010 +- ▁delegate +- ▁Depression +- ▁wallace +- 你能不能 +- 神社 +- 赞成 +- ībā +- trekken +- ▁Stone +- ▁cielo +- 僧 +- ▁Haufen +- بەر +- ▁nasty +- ▁placid +- ▁abbot +- ▁имел +- 这么好 +- ▁erfordert +- ▁отец +- ▁bên +- ▁trifling +- ▁angemessen +- 慈善 +- ▁legislative +- كَ +- ṭṭ +- 針 +- ▁Verkauf +- ▁Ле +- 江戸 +- 嶋 +- ථ +- パフォーマンス +- 猩 +- ▁взять +- 尽く +- 辉 +- ▁медведев +- ▁transcend +- ங்களுக்கு +- 局面 +- ▁glove +- ▁приехал +- ▁violation +- 昨年 +- 脇 +- ▁Health +- ক্র +- 企业家 +- ҙар +- 住房 +- lendi +- ▁mound +- ▁gestorben +- ▁ungewöhnlich +- ▁mouvement +- ベー +- 無事 +- 防御 +- ▁elementary +- ▁kneeling +- を広げ +- öffne +- 七个 +- важа +- ▁Pul +- ▁далеко +- 在这一点上 +- ▁reconstruct +- ▁одном +- 废话 +- рина +- ▁opini +- 静岡県 +- ▁стане +- 指标 +- 狐狸 +- ד +- ▁Ariko +- ▁Global +- ▁pretence +- 轨道 +- ▁magnetic +- ▁gravit +- CM +- 楽しく +- ▁столько +- ▁refusing +- きちんと +- 污染 +- ▁demokratische +- ▁принципе +- 布拉 +- ▁Dennoch +- 确切 +- ▁ivory +- ▁Bauern +- ▁Zucker +- やろ +- ▁frente +- ▁сельск +- ▁petrol +- 影响力 +- 差点 +- 海底 +- antrag +- ▁Bundesstaat +- яўля +- ▁роман +- রো +- ▁probablement +- ▁siebzig +- ▁antonio +- guna +- キック +- ▁strove +- röst +- ないこと +- ▁hiç +- litten +- ▁начинает +- ▁Führer +- ▁introducing +- ▁miglior +- ですもんね +- ▁apollo +- ▁relaxed +- ▁Го +- sinzi +- kunga +- 手臂 +- ▁домой +- ▁glitter +- 老太太 +- ▁dodge +- ▁бюджет +- ▁Fakten +- گار +- activitat +- ▁parecía +- ▁cradle +- ▁дуб +- добав +- خوان +- ▁viņu +- prej +- 模仿 +- ▁bazı +- коммун +- 写道 +- ▁treachery +- ▁vị +- ▁Looking +- espècie +- ▁거기서 +- కా +- ના +- 性质 +- LA +- 毅 +- ▁праблем +- ▁exclaim +- ▁aufhören +- 异常 +- 到最后 +- ▁courtyard +- 勢力 +- ▁prophecy +- ▁recipe +- ▁doomed +- 优雅 +- 迈克尔 +- ▁Ды +- ▁furiously +- ▁sicherstellen +- 尾巴 +- tempered +- 这样的事情 +- ▁thức +- 抱着 +- ۋە +- ▁американск +- пэўн +- ▁hội +- ▁Jordan +- 人工智能 +- ▁trenches +- レーン +- ზე +- ▁bridle +- ▁suspense +- ▁Schriftsteller +- 匹配 +- ▁binding +- プリ +- ▁heutige +- 感動 +- ▁depict +- きれいな +- dolf +- ▁Direktor +- ▁benötigt +- 등 +- ▁missouri +- ▁paradox +- ▁warehouse +- ▁Johann +- forschung +- やったら +- いかに +- 发射 +- ▁compel +- ▁massachusetts +- ▁وهذا +- ▁conosc +- ▁entschlossen +- ▁gaunt +- 仕上げ +- 徴 +- ơi +- ▁дети +- ▁dikontrak +- ▁néixer +- ▁долларов +- 塑造 +- ▁uplift +- لىرى +- 教徒 +- 任何地方 +- ▁камер +- chamber +- ▁marilla +- ▁Stell +- ▁haughty +- ▁sledge +- ▁facilit +- ▁каш +- 百五十 +- 在那儿 +- sicherheit +- 案内 +- 久保 +- ັ້ນ +- වල +- ▁monastery +- ▁Überzeugung +- ▁crooked +- ▁эксперт +- ▁intolerable +- 掃除 +- ▁холод +- 弗雷 +- вшие +- ҟа +- فعال +- 我第一次 +- 大楼 +- kirche +- ព +- ▁ammunition +- ▁applaud +- давал +- ▁medicina +- ▁schooner +- ▁Christi +- ▁orienta +- 一体何 +- っぽい +- 顺便说一句 +- ▁ເດ +- ▁necessita +- 代替 +- 浸水 +- 服从 +- ▁ethical +- 苍 +- 言います +- ▁многих +- ▁وإ +- حاضر +- ▁говорите +- ▁emphatic +- 聞きました +- 困境 +- ▁سے +- 拠点 +- 不记得 +- いらっしゃい +- 有什么事 +- arrell +- 楠 +- ▁survival +- ▁өмнө +- 碰巧 +- lož +- 抜き +- ▁outbreak +- streich +- 任何其他 +- ▁держа +- ▁plaster +- 崔 +- ▁Если +- キャプテン +- 來 +- 皿 +- ▁хотелось +- 步骤 +- ▁черн +- ▁hagati +- ▁surround +- ▁Twa +- ▁அதை +- ▁Nachmittag +- ▁baptism +- ▁секунд +- ごめんなさい +- 决策 +- ▁reasonably +- 介意 +- ▁eky +- という状況 +- ▁anfing +- 食べ物 +- ▁banyak +- ▁injuries +- ނަ +- 失礼します +- 病例 +- 甘い +- тверд +- ▁Fremde +- ▁الذين +- keneye +- ▁zaidi +- ▁ravine +- ▁accommodate +- 朴 +- ▁biscuit +- 衆議院 +- ▁victorious +- أخذ +- ▁Großteil +- そうなんですね +- ▁augustus +- ▁вызыва +- 初戦 +- 能找到 +- ▁நீங்கள் +- 二零零 +- mówi +- 举起 +- 服务器 +- freiheit +- structure +- 神経 +- 妥 +- 信頼 +- ゴルフ +- 经历过 +- 默默 +- ▁Creek +- ▁aṭas +- ▁Guerra +- 宮崎 +- ▁siguiente +- 兵器 +- ▁replica +- 赔偿 +- ▁hiểu +- 過去最多 +- 臂 +- ▁resol +- ▁panting +- жер +- 时尚 +- मु +- Qué +- 涼 +- ▁illustrious +- ▁indefinite +- 厄 +- ▁bedeckt +- ▁shrine +- 潜水 +- ▁exig +- ▁حتی +- дзіць +- ▁спин +- 竞 +- рист +- と比べて +- 케 +- ▁preocupa +- ▁preĝejo +- ▁vẫn +- ▁behaupten +- яўляецца +- ▁notamment +- 運転手 +- ▁weariness +- ▁rimwe +- 吉田 +- そっか +- ▁flint +- 衷 +- 豹 +- ▁زۆر +- ▁заметил +- gypt +- ▁Milch +- 大人気 +- 很多时候 +- рожд +- ▁второго +- 卸 +- 祝你 +- мель +- कु +- 被告知 +- ▁correspondent +- ▁propaga +- 读到 +- 作战 +- 燃え +- ▁우리가 +- ▁passionately +- ▁För +- ▁хор +- 甚 +- 頂いて +- ք +- ▁bệnh +- ▁offspring +- ▁Ancak +- トレーニング +- この時期 +- 買う +- 因为这是 +- 乗客 +- 强迫 +- 市長 +- ▁researchers +- が行われました +- freude +- ▁гэтыя +- ▁scenery +- ignit +- енько +- 物品 +- 紅 +- ▁Original +- 찍 +- hypno +- ▁режим +- ▁ahubwo +- honneur +- 行星 +- ▁imaginary +- winkel +- өгө +- ▁ваша +- ▁tâm +- ޑ +- ▁Président +- 見てください +- 奖励 +- ▁giống +- ▁حيا +- ▁clatter +- ▁circulation +- 調理 +- ございます +- ▁பாட +- 从哪里 +- 很酷 +- 对我说 +- ▁Urteil +- ▁Entdeckung +- ▁proclamation +- 查询 +- ▁wireless +- なと思いました +- ▁deixar +- ▁거는 +- مجموعة +- rühren +- 协调 +- 活発 +- schuh +- რო +- 른 +- そろそろ +- 支撑 +- ▁아니면 +- 有足够的 +- 品质 +- になりそうです +- لدى +- 裕 +- ▁grammar +- ▁lượng +- ▁преступлени +- 牛奶 +- ▁đường +- ▁만나 +- ▁ricevis +- ▁außerdem +- ▁wholesale +- 列車 +- ▁jupiter +- 和我一起 +- ▁acabar +- 液体 +- ▁있지 +- cyaha +- 碎片 +- ▁crater +- 十月 +- impuls +- したあと +- ▁elektron +- 分ごろ +- ある程度 +- 跟他说 +- ▁titre +- ▁своими +- ▁acuerdo +- නම් +- ▁бүх +- いませんでした +- 話す +- 大切に +- 認められ +- ▁хотели +- 放置 +- illard +- Mobil +- 그 +- ニュースをお伝えします +- 监督 +- ētā +- aardig +- ▁discrimination +- 延伸 +- รา +- 流量 +- ▁considerat +- 었는데 +- ▁pronto +- 贷 +- 素材 +- ▁алексей +- ▁caroline +- 屁股 +- 辞职 +- 占据 +- 我不得不 +- ಗಳ +- 很开心 +- eater +- ▁Ahnung +- ▁secular +- 理念 +- 貴重な +- ▁Abschnitt +- ▁hiring +- 寒気 +- ▁vigor +- ▁fick +- ▁decorated +- ดี +- 跟随 +- ▁español +- ▁помочь +- ▁entsprechend +- 인가 +- ächte +- ▁Zehn +- ▁quinze +- 双手 +- ▁đô +- yorsunuz +- 共通 +- ▁tutta +- 仰 +- ▁sentido +- ▁accommodation +- ▁frequency +- 友谊 +- ▁Nigeria +- 邮件 +- ▁публи +- ámos +- 就业 +- सि +- ▁fiddle +- ▁أول +- ▁northward +- 很奇怪 +- 这就是你 +- ▁бүл +- 机关 +- 愁 +- ▁Tränen +- ▁airplane +- சிய +- ▁moralische +- ▁දැ +- ▁luôn +- spetta +- ▁fiend +- 干预 +- ▁potenc +- 勃 +- ޓ +- ▁transparency +- ▁hypothesis +- 守備 +- 作为一名 +- ▁damsel +- 勝手に +- ▁fancies +- ▁bấ +- RE +- ▁cruz +- 不允许 +- お昼 +- ▁запис +- працоў +- ハハハ +- 한데 +- ▁realization +- 随机 +- 分解 +- köz +- ▁характер +- 轴 +- urukundo +- ▁surtout +- 印刷 +- 我从来没有 +- 现金 +- オフ +- ▁chị +- ▁lascia +- tropic +- ▁rwego +- ▁Carol +- ött +- と思っています +- فريق +- 弓 +- ▁recupera +- レストラン +- މު +- 宿泊 +- ▁abolish +- jumu +- ▁قم +- ▁diventa +- ▁chronicle +- 师兄 +- ▁умер +- 研讨会 +- 嘴唇 +- 一首歌 +- որ +- ▁allocation +- にならない +- ▁existiert +- ▁homeward +- gewicht +- 马车 +- ▁beneficial +- ▁Hunderte +- ▁Thor +- ▁различ +- धा +- 写信 +- undsechzig +- ಟ +- 随着时间的推移 +- ▁полностью +- ▁çoğu +- 駄目 +- シャツ +- kogu +- ▁mwana +- てくれました +- прыг +- ▁prolong +- なんですけども +- 起源 +- ▁Matthew +- 限り +- ▁repentance +- ▁hermano +- ▁dinero +- ▁oscar +- исты +- 氛 +- ▁securing +- ▁ukuthi +- ▁derjenige +- ▁Beitrag +- 上午 +- 난 +- ▁gibb +- ▁Evrop +- コントロール +- ▁Records +- 牙齿 +- ▁Ні +- ▁ғой +- ▁jimmie +- ultima +- ▁Earl +- ▁complac +- 相遇 +- 拘 +- ▁verlangt +- ēji +- 玩具 +- 出発 +- 框 +- ▁deceased +- ▁причем +- ▁geöffnet +- ▁melody +- ▁få +- ▁있을 +- ▁مرة +- important +- 投资者 +- ▁southward +- ▁உள்ளன +- 航行 +- 借口 +- ční +- េ +- ▁erheblich +- 視聴者 +- ▁heiraten +- 就是为了 +- ▁neunzig +- 復帰 +- 回顾 +- ▁dagger +- 言いました +- ▁feverish +- 尖叫 +- ▁Hass +- ▁fearless +- ▁programming +- yonna +- ▁extremity +- ▁avere +- ▁minimal +- гээд +- こない +- 났 +- ▁wondrous +- ▁دیگه +- すごいね +- losigkeit +- продукт +- ▁unaware +- ▁factories +- კი +- 起作用 +- ▁millionaire +- লের +- черед +- 躍 +- 钉 +- ▁varieties +- ▁mauvais +- ▁vairāk +- ▁booth +- ▁dónde +- ۇق +- service +- 最早 +- ▁unkind +- 이나 +- fisch +- ▁adverse +- ▁узнал +- ▁가고 +- ▁athos +- ח +- 乾燥 +- روب +- न्छ +- ▁darkened +- ▁applies +- 亨利 +- ▁приказ +- って言う +- ▁முன் +- વા +- ▁spontaneous +- ▁দিয়ে +- schrecken +- stancia +- ల్ +- ▁갔다 +- gefüllt +- 普段 +- ▁speck +- なんだろう +- ▁Вот +- 营地 +- டெ +- gelaufen +- 邻 +- ▁Acest +- 掲げ +- ▁corporal +- おかげで +- 呼んで +- নো +- ▁Fernsehen +- ▁galley +- 天主教 +- ▁precision +- ▁uneasiness +- ▁фотограф +- ▁pēc +- 遵守 +- ▁Again +- ▁kontrollieren +- ▁olabilir +- ▁luce +- ▁rubbing +- bwy +- ▁decoration +- ▁repay +- ▁kullanıl +- ▁immigration +- 毯 +- 蒋 +- ▁volatility +- ▁похоже +- 쪼 +- ▁grievous +- つなげ +- マリウポリ +- 一个地方 +- 進み +- 反复 +- ▁tiên +- ▁Waffe +- ▁высоко +- はありませんでした +- ▁expressive +- 増えている +- 皱 +- 돌 +- ▁نبود +- ▁Dutzend +- と思うんですね +- 对你来说 +- ▁geography +- steigt +- ▁variant +- 助理 +- ▁Vögel +- ▁احساس +- ▁Klin +- ▁residential +- Imperi +- ▁modified +- ▁solange +- ▁filings +- αν +- وظ +- ▁splendor +- ▁Kaffee +- әлә +- eficient +- 豊かな +- ▁معا +- ▁старш +- ▁сложно +- ▁хва +- หา +- πο +- ▁Burg +- ▁федеральн +- ▁лож +- ▁prostitu +- 伤心 +- ▁Howard +- опо +- まらない +- ▁vieux +- хватил +- 巫 +- 吕 +- ▁đúng +- ▁nightingale +- 选项 +- 同士 +- に到着 +- 設定 +- ▁postpone +- ▁нужен +- ▁крут +- 绝不 +- ▁robbery +- ▁Му +- ▁snarl +- ▁cél +- வான +- ▁Anthony +- ▁Krankheiten +- ▁reappear +- あるんですね +- 清醒 +- тянул +- 违反 +- ▁život +- ക്ക +- 琼 +- 防犯カメラ +- Effekt +- ▁elastic +- horaho +- 思え +- мәй +- ▁troy +- てみよう +- سطح +- kombe +- ▁Tanz +- ▁wipe +- 漢 +- ▁cherche +- 粮食 +- ▁držav +- 模様 +- 知識 +- ▁trả +- ▁mold +- ɣur +- ▁softened +- 絡 +- 袁 +- ▁기억 +- 忠诚 +- 预计 +- ▁descendants +- 結び +- 别忘了 +- 还有一些 +- ▁machst +- อบ +- ▁modification +- 挽 +- 刮 +- 不断地 +- klā +- ذكر +- 这两天 +- ▁Philipp +- 主持 +- ▁sıra +- 上涨 +- kreuz +- ▁அவன் +- трымліва +- ▁ເອົາ +- ▁sprinkle +- ▁hesitating +- 目撃 +- 資料 +- ▁chinesische +- ▁transmission +- ▁trebui +- 风景 +- cloud +- かかった +- 疫 +- ってくれる +- 傾 +- ebɣa +- ▁pregnant +- ▁memorable +- ▁Unterricht +- ▁majestic +- ▁Transport +- ▁abyss +- ▁voce +- 挺好 +- ▁Station +- 主人公 +- لىك +- ganira +- ▁geeignet +- ▁kentucky +- ▁telèfon +- 茨城県 +- 酢 +- よろしくお願いいたします +- ▁begeistert +- にもかかわらず +- ▁profesor +- 清理 +- ▁사실 +- ▁rumour +- ▁Forscher +- ▁cupboard +- 見つけた +- 千万别 +- と思ったら +- ▁нашу +- ▁хэсэг +- 翁 +- キャラクター +- ▁ນະ +- ▁охран +- ordinate +- 考えると +- ▁gelten +- ▁chalk +- ▁пути +- ▁competent +- 赠 +- ▁cecilia +- ▁спокойно +- ▁exempt +- 苦労 +- ▁cambridge +- 美洲 +- ゆっくりと +- ▁краіны +- 減ら +- 下一步 +- ▁cripple +- ▁sunrise +- 没法 +- ▁vincent +- かなと思って +- 毕 +- ரும் +- 平常 +- 祖先 +- ▁நெ +- lood +- 喘 +- ಬ +- ラジオ +- منطقة +- ▁civilian +- 快递 +- ัด +- 仆人 +- ▁liquidity +- ▁Onkel +- 地铁 +- ▁thiết +- 参观 +- 来自于 +- وست +- ▁jelly +- 爸爸妈妈 +- stunde +- 見ている +- 'ON' +- ▁Termin +- ដ +- 嘱 +- ▁hudson +- ▁நிற +- ▁fraction +- গু +- দ্ধ +- 媳妇儿 +- 近づいて +- ứ +- 支出 +- すてきな +- 贺 +- ▁ceremonies +- ▁поддержк +- ▁безопасности +- ▁말이야 +- ▁regió +- ▁obstruct +- ▁mercat +- 转移到 +- 领先 +- 美容 +- даецца +- 活力 +- ី +- ▁shrank +- ▁mañana +- այ +- ▁têm +- NN +- 広い範囲で +- 乐意 +- intensive +- 教団 +- 番号 +- ▁galv +- 약 +- ▁لذلك +- ▁ஆகும் +- ▁дахь +- ▁полковник +- ▁নিয়ে +- 谈恋爱 +- ▁nursery +- ▁flaming +- ▁Darwin +- ▁شکل +- 短期 +- 挫 +- ▁Georgia +- 霊 +- ▁negotiate +- ▁gahunda +- ▁fuerza +- ▁Kapitel +- ▁puritan +- 尊严 +- ▁এখন +- ▁இருக்கும் +- ブロック +- 撒谎 +- লাম +- ▁noticing +- ▁rebuke +- ▁vexed +- 年目 +- ĝoj +- вэр +- 生きる +- ▁выступа +- 赴 +- ▁очевидно +- gång +- 明らか +- liegenden +- 各自 +- зву +- 해가지고 +- ับ +- ▁Yani +- ▁матери +- ▁сделали +- ▁آنجا +- ▁Zunächst +- ▁Пасля +- ▁싶어 +- ▁наук +- ▁جوان +- ▁homoj +- 毛病 +- 几百 +- 重量 +- ޔ +- ▁Lächeln +- ▁vijf +- ▁imperative +- 财政 +- писыва +- 曲げ +- なのかな +- ハリ +- ▁Landschaft +- дорож +- ēju +- につなが +- ▁betroffen +- 貧 +- ▁یافت +- 修改 +- ▁Porque +- 懸 +- แล้ว +- ▁einschließlich +- ▁jüngste +- ▁übertragen +- ▁государстве +- ▁Мар +- ▁Señor +- եր +- 国民党 +- 目指 +- 可见 +- 闺女 +- 些什么 +- ▁resignation +- weichen +- ▁rusty +- ているということです +- 指南 +- 祖母 +- 侍 +- ▁منطقه +- ▁பின்னர் +- ވާ +- ▁utilization +- ▁nhỏ +- 野蛮 +- ▁Beck +- 我们确实 +- ▁hannah +- 飲んで +- diğini +- ▁зараз +- 虽 +- 全国各地 +- rigg +- ▁düş +- 督 +- ▁Sala +- 併 +- époque +- ▁malgranda +- ▁proclaim +- pferd +- ▁Anzeige +- ▁yardım +- jyanye +- ▁gait +- ନ +- ▁Bemühungen +- 洒 +- 翠 +- ▁싶은 +- 哭泣 +- technik +- 清洁 +- ▁Fac +- temperatur +- 光明 +- ติ +- ▁Lippen +- 僚 +- ばっかり +- ▁Roboter +- ாட்சி +- mahanga +- ▁dreizehn +- 站住 +- ▁Funktionen +- 自衛隊 +- 花费 +- 公布 +- ▁implant +- ▁murray +- 深处 +- ▁partake +- цаў +- 琪 +- ▁excellency +- ▁insignificant +- ご存じ +- ▁especie +- ▁deputy +- দেশ +- ъезд +- 物質 +- ▁verteilt +- ▁spinning +- spirited +- ▁fairies +- ▁Bydd +- ống +- ▁exerc +- ワイン +- yitibwa +- いらっしゃいます +- 喜び +- やってきた +- ▁humil +- ▁kumenya +- ическим +- 作られた +- 八百 +- えっと +- ▁alumni +- ▁отделения +- ▁kulturelle +- ▁headache +- 呼ばれ +- ▁zamanda +- ▁ekster +- වන +- થ +- ใจ +- ▁així +- gebunden +- 行列 +- ▁прошу +- ▁oblige +- ▁тады +- amenye +- 判决 +- 是时候 +- プレッシャー +- ▁Terror +- ▁jordan +- ▁погиб +- ība +- ▁ведае +- 時過ぎ +- ▁Pep +- 转换 +- を持っている +- ▁snug +- ▁долг +- bourne +- という意味 +- 尺 +- ▁Schicksal +- ▁hóa +- ポーランド +- ހު +- agrada +- ▁utilisé +- どれだけ +- ▁желез +- ▁może +- ▁oppression +- 因为它们 +- 自豪 +- 融合 +- schossen +- դ +- ▁grâce +- ▁посмотреть +- 枕 +- ▁foliage +- むしろ +- 损害 +- குதி +- ▁பிரி +- dığını +- ▁restaur +- 婆婆 +- ▁inizia +- ▁Leiter +- 拠 +- 分离 +- 侄 +- ▁geoffrey +- 깐 +- 遵循 +- ▁begrenzt +- riff +- ▁хозяйство +- ▁হবে +- ющей +- ▁Public +- ıyı +- お子さん +- Psycho +- ▁Glen +- 昨夜 +- 様々な +- نامه +- ▁twinkle +- strafe +- ▁healthcare +- 算法 +- ▁worthless +- 遠く +- 乐观 +- ▁vardır +- 著名 +- 象征 +- ▁مادر +- 显得 +- ▁worauf +- ▁چو +- '2011' +- puesta +- 締 +- ▁மிக +- āli +- ▁아빠 +- ▁fathom +- ▁новые +- ▁valiant +- ▁fanatic +- 很快就会 +- бросил +- ▁gusto +- ▁procura +- 傲 +- 喻 +- 火曜日 +- ▁scruple +- ▁unbekannt +- ▁몰라 +- ▁chronic +- ▁முன்ன +- ▁ziek +- 誇 +- ▁folosi +- juri +- ▁Anruf +- 恼 +- ▁оружие +- ▁eccentric +- ▁inconvenience +- ▁luxurious +- ▁đưa +- ▁benannt +- ▁puedo +- ▁defensive +- 同じような +- ▁звоните +- たりとか +- 艰 +- bearer +- 灌 +- ▁enclosed +- ▁lizzie +- verhältnis +- ▁አይ +- ▁pickwick +- ▁Armut +- ▁nelson +- ▁allemaal +- kumva +- ▁klub +- ▁commencement +- ▁discord +- alphabet +- ▁nördlich +- ポーズ +- 驳 +- ▁đối +- ▁чинь +- ‍ +- ▁südlich +- 郎さん +- ክ +- 項 +- ▁erfüllen +- ▁Что +- ▁головой +- 嘴里 +- ować +- ▁hinweg +- 拉丁 +- ▁самой +- を求めて +- 食べられる +- に当たる +- прашива +- シンプル +- ▁sarebbe +- 职责 +- 模拟 +- 国境 +- ▁다시 +- ▁titan +- テロ +- 藤井 +- builder +- ▁Massachusetts +- ▁gäbe +- ▁먹어 +- ▁сосед +- ▁heritage +- 早晨 +- ▁rappel +- ণে +- ▁ehren +- ▁politika +- ▁facilitate +- 卫星 +- ▁lächeln +- ▁erhöhen +- 严厉 +- おしゃれ +- ▁Pacific +- 康复 +- 暴行 +- と思うんですよね +- ▁prostrate +- 胡子 +- 这时候 +- ஃப் +- ▁antagonist +- ▁фед +- 权威 +- 眼镜 +- ▁Wang +- ▁депутат +- ▁существо +- ▁hubiera +- ლო +- ▁olacak +- 孤立 +- ▁affront +- 予防 +- ▁Susan +- klagen +- ▁parrot +- 日常生活 +- ▁měl +- ▁لطفا +- 茫 +- ▁موضوع +- 栽培 +- ▁Board +- ▁Northern +- しょうゆ +- 市にある +- ▁prosecution +- ▁можешь +- アニメ +- 边界 +- dependence +- американ +- 埋め +- alytic +- ▁animation +- ▁وكان +- 農業 +- 尻 +- จาก +- ラウンド +- ▁magician +- состоя +- ▁freak +- 再一次 +- ▁лидер +- ▁داره +- 子育て +- ▁verbal +- ▁benötigen +- 끔 +- பெயர் +- 貼 +- アイドル +- fleisch +- ▁Point +- ▁پیر +- ▁Branche +- 計算 +- ▁burglar +- খন +- 速い +- ▁furent +- 悪化 +- ▁wholesome +- 普及 +- ▁gaily +- 秘书 +- Produzent +- 悼 +- ▁enforcement +- ות +- 场合 +- 侵入 +- ▁nommé +- ▁아니라 +- ▁oggi +- ▁fiber +- 偉 +- ▁perceiving +- ▁dinosaur +- チャンピオン +- موسيق +- සේ +- yinza +- ▁들어가 +- killer +- ▁plump +- 进攻 +- いったん +- 婦 +- ▁HIV +- ▁haciendo +- ▁немножко +- ▁оппозици +- ▁thereafter +- богат +- سازی +- 会出现 +- ▁écrit +- ▁disappearance +- ▁хаце +- 百姓 +- ▁وهي +- говорил +- ▁prakti +- ต้อง +- ▁nerv +- ▁Kelly +- ▁Ausnahme +- 動く +- σε +- ▁reverend +- ホン +- ▁угодно +- 抄 +- ▁магчыма +- ▁எல்லா +- ▁Erstens +- ▁crag +- ▁машина +- ▁forthwith +- 携带 +- වත් +- ▁earnestness +- ▁interposed +- ▁представлен +- ▁trẻ +- 記事 +- hati +- ▁stieß +- ▁sponge +- ೇ +- ▁Columbia +- ▁Großbritannien +- ▁федерации +- ничтож +- ▁offense +- Bomb +- 吉姆 +- ێکی +- ▁estudio +- ▁darwin +- ▁viên +- ຸ +- ▁возвраща +- смеш +- 裁判所 +- 吾 +- ▁완전 +- 成千上万 +- ▁abilities +- 関係者によりますと +- 别动 +- 30% +- 武汉 +- ▁craig +- ▁economist +- わけじゃない +- ▁ülke +- ▁fung +- ▁cyose +- ▁herausgefunden +- ▁допустим +- 脑海中 +- ▁맛있 +- ▁دقیق +- ▁Truppen +- 連勝 +- ▁perilous +- 骨头 +- ▁هنوز +- カウント +- ▁unangenehm +- ▁exhort +- ▁heavier +- රා +- 流浪 +- 我爱你 +- 你也可以 +- ▁kijken +- 処分 +- '2012' +- ▁Walter +- ▁reflex +- 関心 +- ▁Teufel +- ▁congratulations +- ▁Dilluns +- 鶴 +- CEO +- ▁Tippett +- ▁achieving +- ▁Business +- roost +- 永久 +- замен +- ▁Clara +- このうち +- 身材 +- ▁junta +- 輸出 +- ▁دیگری +- ▁vendor +- が出ている +- ▁сёння +- 幽默 +- ▁Francis +- ▁regula +- পুর +- 兵士 +- ▁Normal +- sponde +- たらいい +- 段階で +- ▁composer +- ▁Junior +- ▁leonard +- されていて +- ▁Eindruck +- solució +- ▁southwest +- ▁equipo +- ▁Metall +- ▁voters +- வன் +- ▁mosquito +- ▁irgendetwas +- ▁següent +- ▁loại +- င်း +- 現役 +- alisierung +- 穿越 +- ▁fervent +- 描いて +- 电视台 +- nachricht +- 主流 +- 广东 +- waardig +- 필 +- ▁Toronto +- ▁alteration +- ▁diligence +- 閉じ +- との関係 +- государств +- ▁Wilson +- στ +- シア +- мовы +- ▁curr +- тып +- 主演 +- ▁neugierig +- ▁элемент +- ▁vibration +- お弁当 +- 甜蜜 +- ▁nikola +- ▁chacun +- 登记 +- ▁flirt +- ▁rapidity +- ▁pourrait +- ▁ومن +- быстр +- avion +- ব্ +- 几十 +- ▁людзей +- ▁Geschmack +- 構造 +- 日連続で +- が必要な +- 続けた +- כ +- ▁sequential +- ▁whistling +- 垣 +- ▁Gestalt +- ▁그래가지고 +- 交換 +- ▁compose +- праў +- ▁estudiant +- 憧れ +- ▁infernal +- ▁věc +- ▁navigation +- 选民 +- ▁recap +- 享 +- ▁америк +- ▁Hungar +- 天赋 +- ▁emerald +- ▁पनि +- ▁شامل +- ▁Motiv +- ▁aufregend +- 此刻 +- ▁generating +- وى +- ▁вялікі +- ▁оказыва +- myśl +- ▁fácil +- ▁treacherous +- 湘 +- そっち +- ▁harriet +- 雷雨 +- 一瞬 +- ▁pouco +- 特別な +- 智力 +- ගෙන +- ▁hunne +- 绩 +- ▁Emotionen +- ಅ +- に基づ +- 威廉 +- ▁beseech +- ▁dramatically +- 落ち着 +- 非難 +- 見通しです +- ▁срок +- んですけれど +- ulira +- プラン +- 搅 +- ▁advisor +- ▁knives +- acağım +- ▁Ла +- ▁警察によりますと +- 几个小时 +- 是正确的 +- ▁schrie +- ரின் +- 改正 +- ▁lyric +- ▁могла +- ବ +- ▁penetration +- ▁Nächste +- ▁být +- atrix +- ساز +- 你为什么不 +- ▁konata +- ётся +- たぶん +- ० +- ▁superficial +- ▁unreasonable +- ▁điểm +- ▁grotesque +- ▁coroner +- ▁Beschreibung +- ▁다음 +- ▁refreshment +- 昭 +- 传达 +- ▁надеюсь +- ំ +- ▁Ontario +- ▁divinity +- ▁vehement +- ▁settling +- 保育 +- ▁лучш +- ▁bekomme +- っていう感じ +- ▁Witz +- 歩き +- ضاء +- diagnose +- 沿岸 +- 衡量 +- げん +- ▁நல்ல +- 改进 +- ிடம் +- ▁большие +- ▁Vä +- ▁Stress +- ▁транс +- ▁dauern +- platte +- ఁ +- १ +- ▁humiliation +- ▁بیرون +- ▁Könnte +- 軍事侵攻 +- ことにしています +- लो +- れん +- டோ +- ждения +- ▁dickens +- 江湖 +- ▁ansieht +- ▁insgesamt +- ▁вещь +- ▁دنبال +- 寒冷 +- ▁lobby +- ハム +- 年ぶり +- 死刑 +- 在接下来的 +- 绘 +- פ +- ▁thú +- ▁millones +- ▁Presse +- コート +- エイ +- 右边 +- entrada +- liselt +- ▁Engagement +- 芋 +- ▁worüber +- ▁regretted +- 首席 +- そうなんだ +- ▁costru +- を決める +- fuß +- റ +- ▁Margaret +- 亭 +- 参议员 +- ▁Nutzen +- sluit +- étend +- gambi +- ▁workshop +- ▁Sprach +- schleunig +- બ +- ▁además +- ▁золот +- 聞かれ +- ▁endowed +- ▁strode +- крыты +- 하면은 +- '4000' +- ▁kommun +- ত্ত +- 总理 +- ▁миллионов +- ▁escaping +- န် +- ウクライナ侵攻 +- ▁முதல் +- ▁Provinz +- ▁Questa +- 有哪些 +- ▁occupant +- ▁rugged +- 調べに対し +- ▁البر +- ▁Gedanke +- 我只是想 +- 篮 +- 贩 +- ঝ +- ▁arguing +- ▁хлеб +- ▁Certain +- を巡り +- ▁پشت +- 生き物 +- ▁parola +- ▁수도 +- 主教 +- 包装 +- 遗传 +- เธอ +- 举办 +- 陥 +- 艦 +- ▁shabby +- 透露 +- скага +- ▁picnic +- ▁construcció +- 占领 +- ▁activist +- işim +- であること +- ▁davvero +- ▁crític +- 珍惜 +- ▁çalışma +- ▁другого +- ▁rogue +- ▁geliyor +- ্ট +- ▁todavía +- ▁ډېر +- ▁गर्न +- おかしい +- ▁действия +- こういうこと +- ին +- ▁примерно +- ▁Greg +- られていた +- 猴子 +- ▁adieu +- ▁готовы +- ▁akzeptieren +- 纱 +- ▁Bewertung +- おそれがあります +- ▁вопросов +- ▁hybrid +- ▁único +- ▁کردیم +- ▁reprit +- escena +- ▁Ҡа +- ▁hoặc +- ▁nostrils +- ▁champagne +- やり方 +- ▁smote +- 圧倒 +- ならば +- ▁babiri +- セカンド +- 才知道 +- 連覇 +- ▁идти +- ▁imposing +- Book +- waarde +- yczn +- らっしゃる +- ▁hoorde +- ▁verbessert +- ▁zooveel +- 形容 +- もらいました +- ▁inviting +- іўся +- ▁volcano +- 新潟県 +- ▁eastward +- froid +- ніз +- mauer +- lösung +- ▁undertook +- 伤口 +- ивается +- ▁زمانی +- කට +- イヤー +- 興 +- ▁militärische +- マイナス +- 这部电影 +- ▁trifft +- ▁доктор +- ▁analytics +- ્ર +- 领袖 +- ▁notorious +- ▁piercing +- ▁película +- ▁compara +- ▁молча +- ▁commentary +- ▁Morris +- станци +- チュー +- ▁conscientious +- 坠 +- ▁Stoff +- غۇ +- 对我们来说 +- серд +- 受け止め +- ▁Brüder +- 皇家 +- 提起 +- 検証 +- ގ +- rechnet +- ழை +- ▁yakın +- ▁alluded +- ▁Parlement +- ▁ripple +- ▁trocken +- вуча +- ▁Saison +- といわれて +- ▁Hugh +- 議長 +- 敞 +- શ +- ▁erscheint +- ▁خاطر +- ▁нужна +- sighted +- 做不到 +- ふた +- 向かい +- 分类 +- 見たこと +- ▁comparative +- 翅膀 +- 丛 +- ▁necklace +- 交往 +- 建築 +- 大伙 +- ▁станция +- ▁geraten +- ▁Gebet +- ▁peuple +- ▁weibliche +- 重症者 +- ▁Vortrag +- ▁Раз +- گذار +- ▁acerca +- பை +- してしまった +- どうでしょうか +- ▁জান +- ▁disclosure +- ▁geschieht +- ▁здравствуйте +- ▁apprentice +- ▁Blumen +- シングル +- ▁одним +- 入侵 +- ▁näi +- に関して +- ▁wakati +- ▁качестве +- ғыҙ +- ▁blickte +- ▁anecdote +- ایل +- ▁secund +- ▁سەر +- على +- ▁devout +- 整整 +- 现在正在 +- dzē +- 脆 +- 羡慕 +- ▁Houston +- ▁Erwartung +- ぴったり +- ▁genießen +- ▁شاید +- ▁nombreux +- ▁Bruce +- ▁genus +- 两周 +- パット +- ▁лежа +- gefahren +- ▁آس +- อยู่ +- ▁Pennsylvania +- ▁છે +- 佐々木 +- durchschnittlich +- 机械 +- 晴れて +- ▁humbly +- ▁afecta +- ▁тракт +- 屋根 +- ▁করেন +- 知らせ +- ▁diagram +- ▁evitar +- くなっている +- ▁intuition +- ▁jonathan +- blätter +- ▁default +- ▁measuring +- зван +- ▁어제 +- ▁protocol +- últim +- してくる +- 処理 +- ეს +- 发誓 +- ▁Mount +- autant +- 如果有人 +- ▁миров +- 障 +- ▁анализ +- ▁அழை +- 如果你能 +- 停電 +- 的角度来看 +- வரும் +- 친 +- 戚 +- ▁Präsentation +- ▁Festival +- 伊丽莎白 +- ▁Geräte +- ▁иметь +- ▁cherry +- ▁Vergnügen +- ▁بىلەن +- ▁때문에 +- 淹 +- ヴ +- ▁Portugal +- ▁Crist +- 尤其 +- ▁Major +- の様子です +- 趣味 +- gesteld +- 糊涂 +- 色んな +- ▁اصلی +- ▁Ausschuss +- వు +- ▁pluraj +- 室内 +- ировали +- 構え +- runde +- 棄 +- ▁گوش +- ▁михаил +- 医疗保健 +- ▁waarop +- を決めました +- 体现 +- ▁voiture +- ▁Ufer +- ▁Route +- もらいます +- ▁tác +- میر +- 拒 +- を果たし +- ▁nachgedacht +- 페 +- ▁комитет +- ▁ມີ +- ▁داستان +- 减肥 +- геҙ +- ジン +- 左边 +- ▁Sorge +- ▁чаго +- ▁incense +- ▁العام +- 旭 +- ▁đại +- ▁بنابراین +- ▁смотреть +- ▁دلیل +- キム +- 말 +- ▁investor +- 塞尔 +- 小伙子 +- 屎 +- ▁Jennifer +- ും +- ▁уровне +- ▁homage +- ▁видели +- 正しい +- ▁laboratori +- に住んで +- ▁illinois +- ▁tiền +- サラダ +- ▁boughs +- ▁russell +- ▁sagst +- 警備 +- zuziehen +- 甘み +- ▁sinister +- ரீ +- ような感じ +- ریک +- 姚 +- ▁Wähler +- ▁columbia +- ▁ekzistas +- ▁perplexed +- ▁братьев +- 渔 +- ▁grill +- ▁exalt +- kontakt +- ▁feit +- ▁governess +- ▁kurya +- ▁Kindheit +- ▁sichtbar +- پتۇ +- เรา +- ▁hump +- ▁پول +- spread +- 愈 +- ▁clumsy +- ▁plutôt +- 취 +- ▁Lewis +- 関東地方 +- ▁каманд +- 哲学家 +- ▁написал +- ▁jazz +- そのとき +- ▁tiel +- よろしく +- ▁Sekunde +- پوش +- 甲子園 +- ▁Widerstand +- jüdische +- ▁pretext +- ▁début +- ▁Standort +- ▁половин +- ▁shovel +- йшлі +- С +- ▁dennoch +- schwäch +- 毒品 +- 救命 +- ▁tiam +- ▁forbear +- ▁convincing +- ▁miraculous +- ▁поговорить +- ▁mugihe +- intérieur +- 睡着了 +- ▁여자 +- 100% +- 하기 +- 修理 +- ruḥ +- 翌日 +- ▁siebzehn +- ▁waistcoat +- 继续前进 +- ▁cuán +- ▁urging +- 給付 +- mıştı +- 茹 +- វ +- ピンチ +- 80% +- 缓慢 +- ▁Ли +- 保密 +- 镜子 +- ▁felix +- あると思います +- つらい +- ▁خودش +- ▁detachment +- ▁prescription +- 貢 +- 都道府県 +- ▁cavalier +- もしかして +- 尼克 +- ▁petersburg +- ▁zunehmend +- đ +- layan +- 哄 +- ō +- ▁zukünftige +- ▁declining +- ▁extern +- சொல் +- 积累 +- っていうのも +- europa +- स्त +- ▁starving +- 祭り +- 呼吁 +- ٌ +- ▁corazón +- ▁сталин +- ▁eugene +- ▁participating +- 做生意 +- ▁condens +- 描かれ +- łow +- できれば +- ▁zacht +- 删除 +- 適用 +- ▁скажи +- ▁Definition +- intérêt +- 滥 +- ▁Một +- schloß +- ▁প্রতি +- 取り組んで +- ▁tolerate +- ▁điện +- ▁auction +- ნე +- ▁Gefangene +- zungu +- ▁hieß +- 董事长 +- ▁calamity +- ▁precipice +- ▁ایجاد +- ▁hoffnung +- ▁nuovo +- 囚犯 +- ▁două +- タイムリー +- 主管 +- ▁Rw +- ▁حسنا +- ▁meditate +- ▁Fakat +- メダルを獲得 +- دعو +- 博客 +- ▁schweigen +- ▁cemetery +- ▁lloyd +- 審査 +- 啤酒 +- 成績 +- nął +- ▁rook +- ▁Association +- ▁Perezida +- ▁baltimore +- ▁endurance +- 洛杉矶 +- 消耗 +- 物理学 +- ющее +- стреля +- 很多人都 +- 揺 +- ច +- 这辈子 +- ▁Knochen +- ▁και +- ▁видно +- ▁Wirklichkeit +- ▁چشم +- ソフト +- ப்போ +- 弟子 +- 1,000 +- 干什么呀 +- գ +- ▁অব +- ▁unreal +- ▁Kristo +- gewandt +- ▁হয়েছে +- сроч +- ▁volgende +- ▁gelukkig +- 하면서 +- ▁زبان +- ▁arbitr +- ምን +- ▁Davis +- 書いた +- ергә +- ▁marcus +- を持った +- 尉 +- ▁hilfreich +- ▁вместо +- ▁څنګه +- ▁irgend +- ▁gambling +- ▁উপ +- ▁возможности +- просить +- ▁unterscheiden +- ▁feststellen +- ゴー +- 食堂 +- ▁gelebt +- んでしょ +- знания +- ▁estudiar +- زده +- ▁норм +- жир +- ▁shameful +- 열 +- ▁течение +- ▁stammered +- 阴谋 +- уулах +- ▁ransom +- kapital +- ▁franco +- 奈良 +- 顺便 +- ▁slipping +- ığın +- ალ +- ▁wichtigste +- 料金 +- 坛 +- ▁малая +- 属下的一个 +- 谐 +- 박 +- いかがでしょうか +- ▁ماشین +- 読んで +- ▁шестой +- Tabelle +- ۲ +- ▁algún +- ▁unanimous +- ▁thống +- ▁skupin +- 暮 +- やめて +- 曇り +- писал +- 驶 +- ▁fidelity +- ▁pouvons +- ▁mondiale +- 速報 +- ▁Überleben +- 離れて +- lardı +- ▁quitted +- ぴ +- 販 +- ▁느낌 +- ▁воздух +- ▁patriarch +- 沙漠 +- ▁развива +- глядзе +- 優先 +- ▁Má +- தான +- 你怎么知道 +- ▁dispense +- 変えて +- ီ +- 鸦 +- ▁Eigentum +- ▁discouraged +- 這 +- こういうふうに +- 阶级 +- せば +- ▁leitet +- theorie +- ▁cultivation +- leihen +- eceğim +- 巨大な +- ▁Lektion +- มาก +- らせる +- торгов +- ▁Empfehlung +- ▁celestial +- ▁occidental +- алтай +- ▁athletic +- 桁 +- affaire +- ものすごい +- ▁civilisation +- ▁اتاق +- 这几天 +- ▁Europäische +- 注定 +- 该地区 +- 析 +- 掏 +- ▁Mitgliedstaaten +- ▁recognizing +- 력 +- ▁казалось +- ▁Sturm +- パーク +- рабатыва +- ▁военно +- ▁sentinel +- どうしよう +- ▁spike +- 良心 +- временно +- ຖ +- 甄 +- 抛弃 +- ▁получить +- ▁abgeschlossen +- 伍德 +- 残念 +- ▁collector +- ▁микро +- ▁joshua +- ▁период +- ့ +- ▁பெரிய +- Source +- ющего +- くなっています +- ▁astronomi +- 汇报 +- 復活 +- țele +- デモ +- хир +- 仙台 +- 囊 +- 舱 +- ▁coincidence +- ▁compromis +- メイン +- inspiring +- ▁politeness +- 碧 +- ▁வழங்க +- 扉 +- ▁pudding +- ▁baptist +- ▁Vull +- ▁epoch +- ▁combusti +- entwicklung +- ▁дней +- 负面 +- 帰国 +- ▁байгуул +- ▁Nachbarn +- ▁대학 +- espér +- ▁Disney +- спя +- ডি +- なじみ +- ▁Bedürfnisse +- 极其 +- ▁بسیاری +- ▁zurückkommen +- うどん +- 悄悄 +- ▁зохио +- メダリスト +- ▁kesk +- ▁possono +- 棕色 +- 総理大臣 +- වෙන +- nummer +- 異なる +- 城堡 +- ғына +- ▁relaciona +- ▁hobby +- ▁людьми +- mektedir +- ▁caballero +- ▁του +- ▁ہے۔ +- ▁адрес +- ▁никакой +- ▁باشه +- ▁durchaus +- ▁außen +- ▁politician +- höchste +- を行いました +- عامل +- ણ +- ቀ +- যোগ +- 六个月 +- ▁sophia +- endroit +- どうでしょう +- ێر +- ファー +- је +- فضل +- 感染症 +- 让我们看看 +- 屋顶 +- 飛車 +- ▁ذات +- drž +- 泼 +- asanzwe +- ား +- 厚生労働省 +- ▁dungeon +- ▁جيد +- 押さえ +- ▁vollkommen +- క్క +- zwingen +- ведения +- ▁Mühe +- ▁seneng +- нести +- 幼儿园 +- 磅 +- 腔 +- 烦恼 +- ▁Fahrzeug +- 眼神 +- ▁чисто +- ▁далей +- を迎える +- ▁sexuelle +- オリジナル +- 马丁 +- ▁aufbauen +- ausschuss +- における +- 周囲 +- 狮子 +- できるだけ +- gegriffen +- ▁langue +- ウクライナ軍 +- ▁herzlich +- ▁suffi +- ▁İki +- ▁gehst +- 苦しい +- توانیم +- 塑料 +- ▁chơi +- ▁khó +- ▁არა +- ▁самые +- ▁tedious +- 感染状況 +- ước +- ▁întreb +- 每一次 +- 岭 +- ▁Vậy +- ▁discomfort +- ▁настолько +- 捐赠 +- ▁капитан +- konsum +- ▁رجل +- ポリ +- ходили +- ▁다니 +- ▁économique +- 敗れ +- génér +- ▁Cross +- ルート +- lumina +- 吸收 +- ▁косм +- 假期 +- klapp +- 验证 +- ▁Fond +- ▁bizim +- ▁portuguese +- ▁rubbish +- 복 +- oubli +- 干吗呀 +- ▁хот +- 运气 +- тыўна +- ▁இப்ப +- ▁kanggo +- ெட் +- 寡 +- ▁sanctuary +- 써 +- よろしい +- 邸 +- ▁tradicional +- ▁bandage +- ▁ukrain +- 渡辺 +- ミニ +- というわけで +- ▁mängi +- ちゃいます +- ▁рух +- abash +- ▁Gedächtnis +- ▁pièce +- 医療機関 +- ▁immune +- 火災 +- ▁forlorn +- genossen +- ▁хоча +- räsentiert +- ▁horribly +- ▁безусловно +- ▁кӱ +- ▁семьсот +- ▁sweetheart +- ▁مؤ +- 解雇 +- 涉及到 +- ▁воды +- 況 +- 塘 +- ▁harvard +- 罰 +- ▁Speicher +- ▁benedict +- ▁fellowship +- 在这方面 +- 英文 +- ▁pronounce +- ţ +- ▁proprie +- ▁болсон +- リュ +- ▁celebration +- ▁Güte +- 正在进行 +- 蔽 +- 청 +- 膝盖 +- ▁радио +- ▁rustic +- ▁общество +- ▁pulpit +- ▁Fußball +- ▁Josep +- cliffe +- ▁தெரி +- 現代 +- podobn +- ▁fascination +- ▁który +- ▁devait +- ▁دهند +- 再現 +- ▁geographical +- 变革 +- ▁мисс +- 史密斯 +- ニック +- ▁Egypt +- строить +- 精神病 +- ▁모르겠 +- ▁русско +- gegenwärtig +- 둘 +- 씩 +- ▁ĉirkaŭ +- 喫 +- एको +- 神话 +- ▁titul +- ▁Träume +- ▁আস +- 厳しく +- ▁dazzling +- ▁erwiderte +- ▁Überraschung +- ▁gedanken +- 增强 +- 基督 +- ાર +- ▁luggage +- ▁이번에 +- täuscht +- ড়ে +- ▁mwiza +- әҙер +- щим +- ▁marvelous +- 入り口 +- wahrscheinlich +- 述べ +- ▁velmi +- ർ +- ▁Londres +- ▁تولید +- ▁Sonntag +- ▁hôm +- 腐败 +- 갖고 +- 承認 +- 考验 +- ▁Chu +- ▁aisle +- ▁beauties +- ambigu +- ぼう +- ▁hippo +- 霉 +- ▁overlooked +- ▁Takže +- ▁moisture +- తా +- ▁hoàn +- ってみよう +- 太平洋側 +- ▁cultivate +- ▁wobei +- ▁ecclesiastical +- édé +- 爱尔兰 +- ▁пространств +- おばあちゃん +- ▁Training +- పో +- 餐馆 +- ▁dripping +- geschenk +- ▁auditor +- ▁unequal +- amatu +- 白宫 +- mutima +- ▁fisherman +- 疆 +- ないですか +- ▁drake +- 判定 +- ▁disseny +- kungu +- 買った +- ▁troublesome +- Blanc +- ▁доступ +- 证实 +- ▁mẹ +- ▁மனித +- ▁Vermögen +- 급 +- 引擎 +- づらい +- Unis +- ▁Anspruch +- 房屋 +- 引退 +- ▁борис +- માં +- ▁Kategorie +- ▁зусім +- 잡 +- 娅 +- ▁Spanish +- ▁thanksgiving +- లా +- ▁хороший +- ▁honom +- 隐私 +- ▁flip +- ▁occurring +- ▁Ereignis +- wheel +- ウォー +- 生み出 +- だと思うんです +- quote +- ▁Despite +- まいりましょう +- tumye +- ▁humorous +- 女優 +- ▁Voraus +- ってしまった +- ▁Köpfe +- 最終的に +- ▁rhyme +- ▁clump +- ▁видеть +- ▁luckily +- ▁رنگ +- ▁malaria +- 生涯 +- патрэб +- ప్ప +- ▁Foundation +- ▁secrecy +- 谜 +- 纤 +- ŝanĝ +- ▁Provi +- ▁дүр +- ご覧いただ +- беҙ +- ▁audit +- ▁spice +- ▁süd +- கல +- 吸引力 +- 色々 +- ▁drüben +- ▁schüttelte +- ▁completamente +- 決断 +- ▁думать +- 励 +- 塑 +- ▁novelty +- 龟 +- ▁Flügel +- ▁diferencia +- ▁cristian +- ▁urmă +- ▁jesuit +- ▁fördern +- َلَ +- と述べました +- 晴れる +- を発表しました +- 言える +- 陌生 +- ▁medication +- 季度 +- ▁lächelte +- 綱 +- ున్నా +- gratul +- ▁моего +- سې +- ▁اين +- 兄弟们 +- ▁ئەوە +- 卿 +- 荒谬 +- 弘 +- ▁제일 +- ▁будешь +- 适用于 +- 食べたい +- 40% +- ▁Ре +- 场比赛 +- ▁xả +- ▁kostet +- ▁bố +- Werbung +- ▁Academy +- ▁mohammed +- ▁آمریکا +- 唐纳德 +- 罗伯特 +- ▁помощью +- 自殺 +- ▁Vì +- ▁Rauch +- 剧院 +- ▁curate +- ▁Moore +- を越え +- 一件事情 +- ▁алма +- ▁distint +- ▁absolv +- ▁deceit +- ▁propriety +- 怪物 +- 根本就不 +- 度目の +- ▁Entfernung +- ▁Après +- ▁signifas +- 日ざし +- ▁Pläne +- ▁Nerven +- ▁güzel +- ビッグ +- ▁hydrogen +- ▁winzige +- farben +- 鞭 +- ▁thompson +- ঙ্গ +- ▁griechische +- ▁хөгж +- 観察 +- sloten +- ▁chunk +- ▁installation +- クリア +- 汚 +- 빠 +- ុ +- ▁elapsed +- ▁Government +- 毁灭 +- ▁rapture +- しさを +- ▁Symbol +- 不思議な +- gestiegen +- ▁practise +- ▁athens +- ▁recreation +- պ +- 猿 +- ▁موجود +- 現象 +- ▁крайней +- ▁morbid +- 感冒 +- ждение +- 引入 +- ▁делают +- ▁moustache +- 洗澡 +- ▁되는데 +- 選び +- flipp +- ならではの +- ▁chemist +- ▁rustle +- ▁Student +- ղ +- ▁longtemps +- ▁verschwunden +- 讽刺 +- ▁sturdy +- 游客 +- ▁arising +- ▁irritated +- ダム +- ▁твой +- عالم +- ▁heroine +- 見たい +- 税收 +- オレンジ +- ▁üret +- 耕 +- 恥ずかし +- ▁왜냐면 +- ▁sidewalk +- ▁никому +- ▁protože +- ▁involving +- ▁alguma +- krebs +- бель +- ▁geplant +- ść +- нюю +- torium +- ▁abnormal +- ▁condescend +- 滤 +- 参议院 +- 別れ +- ▁Cousin +- ▁kansas +- ▁contributing +- ▁ஆன் +- ▁волос +- 带领 +- ▁constantinople +- ▁inasmuch +- ում +- ▁Ähnlich +- 分かりません +- ▁spiral +- ▁tradici +- 追いかけ +- 睇 +- 水曜日 +- ވެސް +- ▁reassure +- ▁Melbourne +- ▁gegründet +- ▁водо +- 北京オリンピック +- ▁voyez +- 介入 +- 获胜 +- ▁سمت +- 虑 +- 溢 +- ▁která +- verwandt +- ▁බව +- ピンク +- میل +- ▁Kommentar +- 漬け +- lassung +- エビ +- 嫉妒 +- ▁Klimawandel +- ▁солдат +- 指责 +- ▁основан +- ▁implies +- 排名 +- 飼 +- ▁Rechnung +- ▁monotonous +- ▁spēlē +- ▁medieval +- ▁rhetoric +- ▁detached +- ▁Mountain +- ▁isolation +- ▁войск +- 解决这个问题 +- ▁хочется +- articula +- ▁umutima +- ▁curb +- ▁liefern +- 伯格 +- ▁Multi +- ▁russische +- 矮 +- ▁Zuhause +- ▁malcolm +- ▁nevoie +- läufig +- ▁shelves +- 落ち着いて +- ▁смерти +- ▁feudal +- 澤さん +- あえて +- klassi +- ▁constituent +- ▁другая +- ▁rabbi +- ライス +- ▁Sklaven +- ▁الواقع +- まもなく +- ▁dictator +- 丫头 +- 荷兰 +- ▁சீ +- ▁salmon +- ▁flores +- っていうところ +- 初期 +- 卧 +- ▁hypothe +- ▁entfernen +- ▁insolent +- 不合适 +- ▁মানে +- ρα +- ▁Dacă +- ▁Montag +- ▁чуж +- рожа +- 準 +- ▁Folie +- 慈 +- 抖 +- ▁Colorado +- ▁Nutzung +- ▁cognitive +- ▁după +- مدرسة +- 残忍 +- 요일 +- sexuell +- ▁القر +- 尋 +- ▁kemudian +- ▁persuasion +- 狄 +- 줘 +- お客様 +- ▁jefferson +- ▁pivot +- ▁машины +- نقل +- ▁استخدام +- ބަ +- ▁whipped +- ▁geleden +- ▁escucha +- ▁Could +- ▁остров +- ▁cavall +- quê +- 须 +- ជ +- ▁versteht +- 拭 +- interesse +- ▁eksp +- ▁disappoint +- ▁även +- 钩 +- katholisch +- ▁کودک +- 紫色 +- ▁cố +- 代の男性 +- Time +- 收益 +- ▁чад +- ▁elevat +- ▁hacía +- ▁Alkohol +- 蔑 +- 褒 +- 从那以后 +- ▁извините +- ▁тело +- ▁spirituelle +- யார் +- ▁sünd +- 设法 +- ▁geographic +- 獲 +- முக +- ▁impamvu +- ▁хэдэн +- ▁Physik +- ▁umfasst +- 反抗 +- ▁concede +- ▁обща +- ▁zwang +- 取引 +- ministerium +- 蹲 +- ০ +- ▁Prüfung +- ▁embedded +- ▁possiamo +- タクシー +- ▁пятая +- 出てきました +- ▁discrimina +- ェ +- 能做到 +- ▁clenched +- ▁pequeña +- ▁tennessee +- 哮 +- 煎 +- かっこいい +- ▁请播放 +- ▁așa +- ▁Jemand +- gehoben +- befehl +- ▁mildred +- ▁disrupt +- mètre +- コスト +- ▁нашим +- ใ +- 膝 +- ▁ausgewählt +- ▁nécessaire +- ▁eclipse +- 换句话说 +- 丼 +- 誠 +- جنس +- ▁daisy +- ▁dzīv +- மும் +- 納得 +- kräftig +- ▁merupakan +- コラボ +- 記者会見 +- 收购 +- ▁crusade +- 金額 +- höhe +- ایە +- ▁hína +- ▁snuff +- ▁социал +- möglichkeit +- ijoro +- ▁clamor +- 順位 +- ▁husk +- ▁Flüchtling +- 奥巴马 +- ▁duidelijk +- ▁குறை +- twenty +- ▁discut +- ▁пройд +- お待ち +- 发动机 +- 警官 +- ତ +- ▁aufzubauen +- 这个样子 +- 넘 +- ▁сергей +- 愛知県 +- ▁shroud +- コンビニ +- 行方不明 +- ▁люблю +- ▁rusange +- ▁твои +- となりそうです +- 訴 +- ▁augustine +- чёт +- ものすごく +- ▁Society +- 动态 +- ▁vierundzwanzig +- ▁ocurr +- ▁mitigate +- konomi +- ▁testify +- ▁чист +- ▁имеют +- ▁destructive +- ▁историю +- ▁мальчик +- ▁alcuni +- 渠道 +- 砂糖 +- ▁besagt +- علام +- waffen +- 原則 +- ▁agafar +- ▁supposition +- ▁diplomatic +- ▁marshall +- fünfhundert +- ▁pilgrimage +- 今のところ +- ālā +- ▁legitim +- ித்தார் +- ▁мысли +- ሚ +- ▁securities +- 辖 +- ▁bestätigt +- umukobwa +- ▁Therefore +- 軟 +- ▁problém +- 並ぶ +- ҙың +- ▁Họ +- 寄付 +- ченко +- gerissen +- ▁hâ +- ▁компьютер +- ▁முக்கிய +- こんなふうに +- 媛 +- 놓 +- ▁vigour +- ▁apparition +- ▁imbaraga +- ▁gratified +- symmetri +- ▁clash +- ▁milestone +- hundertfünf +- ▁Doctor +- ლებ +- ▁camí +- عيد +- 鸿 +- ▁disponible +- ▁butterfly +- ▁teenager +- ▁carlyle +- ▁Mereka +- ▁хэв +- ступил +- とみられています +- entendre +- 棘 +- ▁enabling +- ▁faltered +- ▁bizarre +- ▁nodding +- präg +- 重症 +- ▁cartoon +- ▁hareket +- とみられます +- ▁Republik +- பாடு +- ▁width +- ▁риск +- ▁Runde +- ẫ +- ڈ +- ▁zugänglich +- ျ +- 詩 +- مجتمع +- ゲット +- 散步 +- dämm +- ▁eigenlijk +- 菓子 +- 飛んで +- 级别 +- 厌恶 +- 齿 +- ▁ingenuity +- ▁последний +- ▁руководств +- ▁NASA +- ▁muffled +- ▁theoretical +- ▁நிறுவ +- 女孩儿 +- ▁shilling +- ▁ethnic +- ▁сделан +- 合わせた +- క్ +- счастлив +- 茅 +- ▁Indonesia +- ▁مدرسه +- ▁정말 +- ▁Fehl +- 豪華 +- ▁chestnut +- 伙计们 +- が止ま +- 新宿 +- ▁பெரு +- 了解更多 +- ▁серьезно +- 績 +- 面试 +- 结局 +- pflege +- Á +- թ +- ▁crouched +- フルーツ +- 최 +- ▁espacio +- خصوص +- ▁eindelijk +- 悲剧 +- zauber +- physiologi +- ▁подготов +- ▁лепш +- 一个例子 +- 远处 +- 合計 +- చ్చ +- 赎 +- ຢ +- ▁ecstasy +- ▁نمایش +- ӹн +- 実態 +- rechnen +- менять +- ▁новых +- 竟 +- ▁landlady +- 繊 +- ▁gutanga +- ▁ominous +- ▁voulu +- 証言 +- 演示 +- ▁junk +- ▁integrate +- 隠れ +- ▁Außer +- ▁Entschuldigung +- ▁Kommissar +- 扫描 +- ▁hoofs +- wäss +- اگه +- ▁лица +- 嘿 +- ▁merchandise +- ▁செயல் +- ▁elkander +- ▁spüren +- ▁clown +- ▁важна +- ▁calculate +- 収入 +- 続けている +- 灵活 +- ▁தனி +- ▁fügte +- ▁взаимо +- ▁갑자기 +- ▁Republikaner +- ▁infidel +- ▁desolation +- ▁längst +- 坚强 +- ▁cicl +- 行きたい +- lehrer +- ▁literal +- 韓 +- 非常有趣 +- ▁marcel +- ▁Intelligenz +- 冈 +- ▁пошли +- 气候变化 +- が必要だ +- ▁странно +- με +- 种族主义 +- の疑いで +- ▁yahweh +- 斥 +- 至关重要 +- ▁Kämpfe +- ▁detained +- ▁هنر +- ▁sovint +- ▁syllable +- ▁mittlere +- schalt +- aufnahme +- トルコ +- ▁цели +- ▁judith +- ▁spacious +- 海滩 +- کۆ +- ▁yazı +- ▁भए +- ▁Minnesota +- ▁использовать +- ▁languid +- ▁آورد +- ▁reiterate +- ▁Patrick +- ▁убива +- ▁توجه +- Europa +- ▁تواند +- 崎さん +- ▁Richtlinie +- ▁kibazo +- ▁potenziell +- ▁deferred +- ▁பிறகு +- ނެ +- ▁usurp +- 羽毛 +- schwor +- نوشت +- ▁appoint +- ▁sancho +- ▁குழந்தை +- ▁Үүний +- ▁línea +- ▁Studium +- ▁Ireland +- ▁Modern +- 病床 +- льныя +- ▁кровь +- 査 +- 心疼 +- 렸 +- ▁يوجد +- owski +- ▁konkret +- ▁பற்றி +- ▁categori +- ▁نقش +- дзь +- 炼 +- ▁நிகழ் +- ▁indicating +- ▁Gegenteil +- ▁Emily +- ▁война +- 行われている +- ▁presidential +- ▁Little +- கொள் +- 肤 +- ▁Existenz +- 拜访 +- ▁antony +- ▁Samuel +- 見つかり +- ▁může +- 垒 +- 慷慨 +- ▁Ernährung +- ▁displeasure +- ッグ +- 捉 +- ▁говорим +- ▁değiştir +- 必然 +- ▁condicion +- ▁welsh +- 拜拜 +- 失业 +- ▁sparrow +- アピール +- ▁sociedad +- ދަ +- ދު +- ▁사람들이 +- ▁mercado +- 見つけ +- 文書 +- ▁Auftrag +- ▁Annahme +- ▁Coast +- ներ +- 霜 +- ▁boyunca +- 時半ごろ +- ▁asteroid +- ▁коротк +- ▁قانون +- ிற்கு +- nshingano +- ▁musí +- 边境 +- ▁riddle +- 伍 +- 謝 +- ▁Illinois +- ▁دانشگاه +- schließt +- ▁पु +- イラン +- ▁ĉu +- ミング +- ジョー +- ▁comunidad +- ▁companionship +- 轻易 +- qqim +- ▁portray +- ▁үнэ +- ёшь +- 好奇心 +- になりたい +- 虾 +- bitekerezo +- ▁Pēc +- ▁antiquity +- ▁científic +- 淋 +- アルバム +- ▁acontece +- おととし +- 師匠 +- gemeinschaft +- 看一看 +- きょう午前 +- 抜いて +- 谢谢大家 +- ▁bırak +- versorgung +- ▁Konferenz +- GPS +- ▁혼자 +- ▁gracias +- ▁athenian +- ▁persoon +- ▁inaugur +- ▁менш +- 群众 +- ▁northeast +- ▁vorgeschlagen +- 雨が降って +- ▁patriotism +- 档案 +- luğu +- ໃຫ້ +- ▁defiant +- ▁malicious +- ▁Kansas +- ▁chaplain +- 残酷 +- ▁bertram +- 交付 +- 消化 +- ▁Felsen +- ▁bathroom +- おじさん +- ▁байр +- ▁palju +- ▁Alpha +- ▁Katastrophe +- ▁Respekt +- ▁обязан +- ツイッター +- ▁oyna +- ▁غذا +- ▁أفضل +- ▁لدينا +- filtr +- 层次 +- 主导 +- 속 +- ጥ +- ▁sophisticated +- ▁буквально +- ▁façana +- 侮辱 +- ▁дороги +- 前两天 +- 将棋 +- ▁너는 +- ريخ +- ුරු +- 労 +- 戴安娜 +- 逝 +- ▁سیستم +- ̃ +- 剥夺 +- ▁ensued +- 这是一件 +- schwimm +- 是不可能的 +- 辞め +- ▁pueda +- església +- süchtig +- ▁birçok +- 出租车 +- 귀 +- 麗 +- 적으로 +- 注射 +- schränkung +- ▁snare +- ▁skate +- ▁retard +- 一生懸命 +- ▁ängstlich +- ▁vegada +- ডা +- あるんですか +- 起訴 +- räte +- 徽 +- ▁Posteriorment +- ▁nyinshi +- ▁trabalho +- 코 +- ▁చె +- ▁Больш +- ロンドン +- ▁verwandelt +- ▁bagian +- 設計 +- zugreifen +- ▁பழ +- 语音 +- ▁naples +- 世の中 +- 括 +- 嚟 +- ఫ +- 挖掘 +- 世帯 +- ▁scottish +- 見直し +- ركز +- 現れた +- ▁Stich +- ▁refinement +- ▁keith +- 老虎 +- бега +- ▁temporarily +- コンピュータ +- 腸 +- 昆虫 +- アート +- '1.5' +- 維 +- ▁straightforward +- ঐ +- 凌晨 +- 繁殖 +- ধি +- beamte +- ▁руках +- ▁задерж +- ▁остава +- 積極的に +- 匆 +- ▁Interessant +- ▁законопроект +- liśmy +- ▁cantonada +- ▁مرکز +- 杭州 +- ▁verurteilt +- 噴火 +- ▁адказ +- ▁Nacional +- expliqu +- ▁rebuild +- people +- ıyordu +- 눈 +- 설 +- キッチン +- ▁لطفاً +- ▁decât +- ▁일본 +- ▁prodigious +- ▁Jacob +- 散歩 +- 传奇 +- ▁klassische +- ▁существу +- ▁форма +- 釣り +- ▁подпис +- 风暴 +- ▁Opera +- ▁институт +- ическом +- ▁michigan +- バッグ +- ▁clinton +- それこそ +- весел +- 商業 +- ハンド +- ipps +- ▁spouse +- ▁trustee +- ▁площад +- ▁uzata +- صاب +- geheimnis +- 披 +- ച +- ▁sorgfältig +- 현 +- แต่ +- ▁discreet +- chirurg +- といわれる +- ുന്ന +- ▁возраст +- ▁birkaç +- schirm +- 环节 +- ▁intact +- ▁Então +- طبق +- 亨 +- forderung +- 階段 +- 教导 +- auftrag +- kümme +- 所需的 +- ▁Jimmy +- ▁kümmert +- 대로 +- ▁aquellos +- እ +- ▁susceptible +- 痕迹 +- ▁fuerte +- トレー +- ▁invece +- ложения +- 静岡 +- kündigt +- ▁hoffentlich +- ▁audible +- 학년 +- ▁Finanzierung +- າມ +- ▁simbol +- rätt +- ாலும் +- ▁سخت +- ▁ĉefa +- ▁veröffentlichen +- ▁медицин +- ▁دوباره +- アスリート +- ▁건데 +- ordination +- あぁ +- ▁utawa +- 判明 +- マウンド +- 木曜日 +- PCR +- ▁produzieren +- ▁tactics +- 可能性もある +- ிங் +- هدف +- artista +- 違った +- 弊 +- ▁bijzonder +- ▁nghệ +- ▁boulder +- 逃避 +- 减轻 +- 唉呀 +- ▁Einfach +- ▁Hütte +- ▁Feli +- ▁Charlie +- 反発 +- ▁navigate +- 極めて +- ▁дожд +- ▁забыл +- ▁bourgeois +- ▁steadfast +- 졌 +- ሆ +- ▁voulez +- ▁силы +- فروش +- ▁Chief +- 想一想 +- າກ +- 항 +- 芸術 +- ▁सु +- ▁implicit +- ▁duncan +- ▁واحدة +- ▁humming +- muster +- 装备 +- ▁membership +- کشید +- ▁bequem +- ▁vieille +- ▁begleitet +- ▁empfind +- ▁果啲 +- ▁impulsive +- ▁அரச +- ▁позвонил +- ▁düster +- ▁bunlar +- ▁Offizier +- ▁دغه +- 贫穷 +- ▁করতে +- 多长时间 +- 赞美 +- ▁boost +- باد +- ▁успел +- 没见过 +- 変わらない +- 肾 +- ▁industrious +- ▁конфликт +- ▁беларускай +- ▁carrière +- ▁zgod +- ▁renounce +- 股份 +- глянул +- faktor +- 臨時 +- 拜托 +- building +- ▁demselben +- ▁Spiegel +- ▁enchanted +- ▁그럴 +- 抚养 +- ▁아예 +- ▁conserve +- 姐夫 +- ▁erwähnen +- ▁influential +- ▁первого +- ▁кандидат +- vermögen +- ▁penitent +- 受益 +- ▁wiederholen +- atangiye +- க்காக +- 面积 +- ▁aconsegui +- ▁columbus +- ▁verpflichtet +- 貫 +- ▁tournament +- 令人惊讶 +- ▁hinterlassen +- ▁servicio +- ▁словно +- 地板上 +- ىرى +- シート +- キーウ +- 训 +- ਜ +- ▁Ayrıca +- ▁bình +- ▁resembling +- ▁birlikte +- ▁আমার +- ▁vienna +- ▁retiring +- ▁Yagize +- 贵族 +- ▁mnoh +- 宿舍 +- 成为一名 +- 投降 +- ▁Zahn +- ▁Ня +- عيش +- ▁fritz +- крыл +- ▁execu +- 乏 +- 瓷 +- ס +- ▁gobierno +- ▁westminster +- ▁Усё +- 책 +- ▁temporada +- 隙 +- 昇 +- ಾಗ +- ▁мужик +- ლე +- լ +- ▁llavors +- シュー +- 教師 +- ランチ +- ▁тэг +- сурс +- 早已 +- ▁Bridge +- ▁geleistet +- ▁mỗi +- 水準 +- ▁되지 +- ▁triangle +- ▁fuori +- 玛丽拉 +- изова +- ▁Dichter +- 異常 +- 炸弹 +- ▁elevator +- ▁gateway +- ルーム +- 观念 +- ▁signify +- ▁distraction +- 推广 +- equilibri +- ▁wunderschön +- 哑 +- 弗兰克 +- 谣 +- ▁مكان +- 疑惑 +- ▁Bürgermeister +- ▁beetle +- রাজ +- ことはない +- verbrauch +- 風景 +- ▁kaldı +- zusammenarbeiten +- ▁appelé +- kandida +- ▁compost +- ▁заўсёды +- ▁степени +- ▁erstaunt +- ▁tödlich +- 对他来说 +- ▁seguro +- ッカー +- 怜 +- を対象に +- τι +- झ +- ▁requiring +- indirimbo +- ▁gufata +- förmig +- ▁thrice +- ▁piteous +- espai +- 百六十 +- 背叛 +- ▁برخی +- チェン +- ▁Prinz +- 暴風 +- ळ +- ▁развития +- ▁хүрэ +- ▁Firma +- 报仇 +- ▁chuckled +- ▁sacrifi +- されていない +- お疲れさま +- ▁Experten +- ▁республик +- ▁peninsula +- 乗用車 +- ▁좋아하 +- ▁parliamentary +- ල්ල +- ாமல் +- 简短 +- ▁forfeit +- ङ +- پذیر +- 畳 +- 冷蔵庫 +- ▁rôle +- بناء +- ▁Summe +- ▁любим +- ▁spars +- ▁konkur +- ើ +- ែ +- ੋ +- ▁Així +- ▁lấy +- ▁않았 +- 汀 +- డి +- 打败 +- ▁unendlich +- ▁гости +- ▁сабе +- ▁tehnolo +- بێت +- ▁posibil +- 揮 +- 逢 +- ▁chuyển +- 眞 +- ▁Kennedy +- ▁miliard +- ▁эфир +- ọ́ +- ▁метод +- なりません +- schäf +- ▁роль +- 这项工作 +- ېرى +- 虐 +- 恭 +- ▁Ukraine +- ▁gratification +- ▁सं +- ěl +- 另一件事 +- ▁teilweise +- 新潟 +- 並べ +- こいつ +- ġ +- ▁কিছু +- 태 +- ▁perchance +- グッズ +- ▁transplant +- ▁impartial +- 入ってる +- 小さく +- んねん +- 的一件事是 +- ▁lehnte +- ▁distingu +- ▁metropolitan +- 처럼 +- ▁gegessen +- 呈 +- ▁trouvé +- ▁recurring +- お菓子 +- ▁ຫຍັງ +- ホワイト +- 담 +- 兜 +- આ +- 阪 +- 塌 +- 锡 +- ढ +- २ +- 扛 +- ỳ +- 雌 +- 忽 +- 偿 +- И +- 捧 +- 釈 +- 滨 +- ሄ +- 娇 +- ូ +- 铭 +- 滩 +- ャ +- ύ +- ޯ +- 斌 +- 절 +- 종 +- 託 +- ޫ +- 缶 +- 崖 +- ദ +- 潰 +- 緊 +- ɗ +- 蔓 +- 仑 +- ஈ +- 브 +- ৎ +- 厦 +- 扰 +- អ +- 벌 +- 증 +- ২ +- ਵ +- 骚 +- 吨 +- 歓 +- 竖 +- 址 +- 瞥 +- ള +- 渋 +- 挪 +- 暇 +- 掛 +- յ +- 铅 +- 钓 +- 橡 +- 拡 +- 狐 +- 줬 +- 출 +- ٽ +- এ +- 柿 +- 络 +- 乙 +- ቃ +- 幾 +- 亜 +- 嗅 +- 咕 +- 喔 +- 畜 +- 茄 +- 글 +- ጠ +- Б +- 學 +- 플 +- 勉 +- 咸 +- 锤 +- ð +- 건 +- 능 +- ఉ +- 歧 +- 叨 +- ҿ +- 烫 +- 坤 +- 芦 +- $ +- 監 +- គ +- 践 +- 침 +- 蟹 +- 唠 +- 합 +- ຜ +- 堤 +- 肘 +- 宪 +- 임 +- 梳 +- 霞 +- 薛 +- 병 +- 순 +- 攀 +- 驴 +- 灣 +- 甩 +- 邊 +- 妄 +- 론 +- 軸 +- 메 +- 脈 +- 튼 +- 蓉 +- 赐 +- ਮ +- 葱 +- 魅 +- 盼 +- ံ +- 秃 +- 甭 +- է +- 란 +- ぺ +- ጣ +- ৱ +- 窟 +- 靖 +- 颈 +- 壶 +- 꾸 +- 앞 +- ኛ +- 浆 +- ঙ +- 仆 +- Д +- 叉 +- 亀 +- 猴 +- 茎 +- 삼 +- 嬉 +- 澳 +- 颁 +- 笛 +- 커 +- 稻 +- 엔 +- 筛 +- 魁 +- 閣 +- 渣 +- 蹴 +- 搏 +- ੰ +- 肆 +- ں +- ਆ +- 蛙 +- 磊 +- ସ +- 點 +- 汰 +- 棟 +- 陕 +- 憎 +- 绣 +- 歪 +- 頻 +- 趟 +- 岂 +- О +- ވ +- 胞 +- ష +- 표 +- 噴 +- 堕 +- 圏 +- 咳 +- 淑 +- 庸 +- 疚 +- 嘎 +- ג +- 滝 +- 譲 +- 炫 +- ପ +- 蓬 +- 绵 +- 谎 +- ዳ +- 碍 +- 巷 +- 驰 +- ኔ +- ጋ +- ξ +- 평 +- ű +- ါ +- 詳 +- 圭 +- 顽 +- 蹦 +- 枉 +- 頑 +- 慣 +- 鋭 +- 鲸 +- 栖 +- 姫 +- 嫩 +- 큰 +- 渊 +- 掠 +- ಳ +- ਂ +- 衔 +- 憋 +- 癖 +- 顧 +- 확 +- 얼 +- 宰 +- 厨 +- ડ +- բ +- ဆ +- 颖 +- ੱ +- 碌 +- 駐 +- 覇 +- 禅 +- 肿 +- 乞 +- 們 +- 듣 +- ਪ +- 匿 +- 梗 +- 幹 +- ޖ +- 俘 +- 접 +- 彪 +- 醋 +- 阅 +- 紋 +- ሉ +- ୁ +- 國 +- 袍 +- 癒 +- ° +- 夷 +- 腻 +- Î +- 愣 +- 堅 +- 賢 +- 沾 +- 贬 +- 绸 +- 먼 +- 맛 +- 灿 +- 끝 +- 吟 +- ണ +- 駆 +- 镖 +- 활 +- 妆 +- 硕 +- ჯ +- 硅 +- 몰 +- 纷 +- 彫 +- 渗 +- 陋 +- 賄 +- 陀 +- 셨 +- – +- ჰ +- 奎 +- 杏 +- ڑ +- ମ +- 译 +- ט +- 盾 +- 盔 +- 贞 +- 溺 +- 坪 +- 잠 +- 육 +- 쁘 +- 竭 +- 佢 +- 辽 +- 袜 +- 栈 +- ሎ +- ፈ +- ೊ +- Í +- ፍ +- 詹 +- 怎 +- 仿 +- 婶 +- 循 +- 백 +- 馅 +- 橙 +- 徹 +- 鍛 +- ሱ +- 儒 +- 恕 +- 耸 +- ೂ +- 淳 +- 翌 +- 餌 +- 庇 +- 捞 +- 斐 +- 膏 +- ۵ +- 럽 +- 隅 +- 啪 +- 辐 +- 熙 +- ඔ +- 紛 +- 捏 +- 떨 +- 손 +- 駒 +- 愚 +- 椎 +- 朽 +- ̉ +- 踪 +- ዚ +- 帘 +- 嘟 +- 颊 +- 惧 +- ኮ +- 藩 +- 筑 +- 廉 +- એ +- ሳ +- 葵 +- 慕 +- 泛 +- 窒 +- ધ +- 랬 +- 昧 +- 족 +- 屿 +- 죽 +- 팔 +- ယ +- 粥 +- ዋ +- 餅 +- 討 +- 廣 +- 붙 +- 씨 +- 犹 +- 叮 +- 萩 +- 歹 +- 咽 +- 湧 +- 侣 +- 蝶 +- 捆 +- В +- ڻ +- శ +- ખ +- 懲 +- ؛ +- 哩 +- 尹 +- 웃 +- ỷ +- 망 +- 즈 +- 贱 +- 瞅 +- 斎 +- П +- ሪ +- 졸 +- 貯 +- 郵 +- 频 +- 囚 +- 鲨 +- 凳 +- 缸 +- 짝 +- ۀ +- ɓ +- 뭘 +- 홍 +- 옷 +- 쳐 +- 참 +- 痒 +- 혼 +- 眨 +- 揉 +- Ғ +- 테 +- ծ +- 咒 +- 绒 +- 厘 +- 변 +- 십 +- 厢 +- 琐 +- ਼ +- ẳ +- 缴 +- 驼 +- ዲ +- 嵐 +- 礁 +- 粹 +- 독 +- 俳 +- 妞 +- 쉬 +- 毙 +- ዛ +- 岐 +- 闇 +- 肮 +- ـ +- 債 +- 盯 +- 銅 +- 卦 +- 莹 +- ။ +- ቅ +- 樱 +- 笠 +- 薯 +- 醇 +- 栓 +- ़ +- 崛 +- ካ +- 刹 +- 奨 +- 迅 +- њ +- җ +- 릴 +- 투 +- 谭 +- 俯 +- 帳 +- 帐 +- 瑶 +- 떡 +- ោ +- 溝 +- ೀ +- 谍 +- 습 +- 噩 +- ҷ +- ੁ +- 淀 +- 勺 +- 簿 +- 曝 +- ւ +- 饺 +- 棺 +- 斩 +- ። +- 貝 +- 鼎 +- Ё +- 뒤 +- Ŝ +- 별 +- ိ +- 殻 +- 舗 +- 婿 +- 韵 +- ੈ +- 충 +- 酔 +- 繋 +- 剛 +- 愤 +- 韧 +- 웠 +- ಣ +- ฐ +- 哺 +- 哼 +- ඟ +- 嗡 +- 緒 +- 姬 +- 慶 +- 匈 +- 懐 +- ഷ +- 挠 +- 氓 +- இ +- 桩 +- ۍ +- ៅ +- 纲 +- 妾 +- 軌 +- 渴 +- 聘 +- ם +- 衍 +- 랐 +- ŷ +- 奄 +- 酋 +- ူ +- ହ +- 睹 +- 拌 +- ຶ +- 绪 +- 瘫 +- 濱 +- 隧 +- 瞄 +- ൂ +- 耿 +- ൻ +- 吱 +- 喇 +- ഗ +- շ +- 嘻 +- ಷ +- ಚ +- 霧 +- 넣 +- 怠 +- 杖 +- 錦 +- 屠 +- 빼 +- 鞠 +- 眺 +- 藻 +- 栅 +- 矛 +- 冊 +- ထ +- 聆 +- ശ +- 怡 +- 宵 +- 악 +- Ы +- 嚷 +- 패 +- ァ +- 勋 +- 언 +- 慨 +- 赦 +- 萍 +- ռ +- 막 +- አ +- ζ +- 豁 +- 휴 +- 肚 +- 捣 +- ٿ +- 셔 +- 昨 +- 皓 +- 喃 +- എ +- 喉 +- 貿 +- 趴 +- 迭 +- 謀 +- 孩 +- ቤ +- 薦 +- 粛 +- 哒 +- ീ +- 趾 +- 酿 +- 섯 +- 痘 +- 茜 +- 蚀 +- 菇 +- எ +- 躬 +- 誌 +- 佬 +- ¿ +- 俵 +- ċ +- 꺼 +- 渐 +- 卒 +- 鸽 +- 發 +- ళ +- ហ +- 亩 +- 唇 +- 壇 +- ಶ +- 蝇 +- ચ +- 蜀 +- 隻 +- 俱 +- 泌 +- 剿 +- 磕 +- ቸ +- அ +- 衬 +- 처 +- 垮 +- 琉 +- 墅 +- 쯤 +- 畀 +- 険 +- 扁 +- 橱 +- 창 +- 咁 +- 婉 +- 특 +- 沒 +- 倫 +- 噜 +- 钦 +- 傍 +- ӷ +- 嗓 +- 枫 +- 답 +- ਗ +- 茸 +- 兑 +- 攒 +- צ +- 惰 +- 림 +- 숙 +- 榄 +- 氢 +- 喧 +- 览 +- 澜 +- 羅 +- 忏 +- 憩 +- 拱 +- 轿 +- Ú +- 슬 +- 倩 +- ਅ +- 剔 +- 距 +- 禀 +- 揍 +- ޒ +- 嵌 +- 瞬 +- 憲 +- 蹭 +- 凹 +- 판 +- ィ +- ९ +- 凸 +- 橘 +- 苔 +- 蕉 +- 浇 +- ৯ +- 獄 +- 穂 +- „ +- 贸 +- ʼ +- 읽 +- 聪 +- 료 +- 옆 +- 乜 +- 골 +- 對 +- 谨 +- 斧 +- 켜 +- 縄 +- 菱 +- '#' +- 吼 +- 허 +- 嗽 +- 蹄 +- 拇 +- ಜ +- 該 +- 텐 +- 북 +- 髓 +- ਬ +- 浄 +- 荘 +- Ô +- 죠 +- ះ +- 栃 +- 몸 +- 瓣 +- 莓 +- 굴 +- 塾 +- 遵 +- ן +- ۹ +- 甸 +- 娟 +- 蠢 +- 함 +- 獣 +- 缅 +- ೋ +- 틀 +- 각 +- 啫 +- 屯 +- 經 +- Г +- 餃 +- ੂ +- 療 +- 绍 +- 槛 +- ჭ +- ధ +- 겼 +- 曦 +- 涅 +- 涡 +- 鄙 +- 霖 +- 麟 +- ભ +- 冥 +- 谅 +- 희 +- 饲 +- 潇 +- 飽 +- 骆 +- 哉 +- ఒ +- ઓ +- 萎 +- 膚 +- 斉 +- 皂 +- 屑 +- 悯 +- 衫 +- 鉢 +- 축 +- ሞ +- 며 +- 법 +- 芙 +- 疤 +- 帜 +- 罕 +- 蝠 +- + +- 향 +- 寇 +- 丫 +- 힘 +- 朋 +- 拙 +- 荆 +- ஆ +- 늦 +- 황 +- 撇 +- 택 +- 戳 +- 랜 +- 撰 +- 璃 +- 釜 +- 혀 +- 盏 +- ォ +- 択 +- 沛 +- 臀 +- 莽 +- ሀ +- 酮 +- 록 +- 诵 +- 绊 +- 婪 +- ૂ +- 硫 +- 품 +- 碁 +- 郝 +- 匀 +- 颇 +- 聋 +- 賠 +- 删 +- 阐 +- ណ +- 妊 +- Х +- 辟 +- 丞 +- 牡 +- 석 +- 익 +- 噬 +- 拟 +- 瞪 +- 刈 +- 坝 +- 嵩 +- ۳ +- 욕 +- 详 +- ሩ +- 檬 +- 媚 +- 虏 +- 粪 +- 닐 +- ҽ +- 蜗 +- 惦 +- ൾ +- 鐘 +- 淮 +- զ +- 베 +- 铸 +- 綿 +- 缉 +- 蘑 +- 垄 +- 粤 +- 슷 +- 풀 +- 맨 +- 骸 +- ٬ +- 唬 +- 绞 +- 블 +- 婴 +- ധ +- 蒲 +- 넌 +- ួ +- У +- 综 +- 塊 +- 择 +- խ +- 膳 +- 蒜 +- 蝙 +- ਇ +- 嚼 +- 榴 +- 曽 +- 때 +- 枢 +- 於 +- 偵 +- 涩 +- ആ +- 诡 +- 鳄 +- 矩 +- 溯 +- 贿 +- 검 +- 쓸 +- 칠 +- 厉 +- 責 +- 靓 +- 송 +- 炖 +- 疹 +- 肃 +- 咧 +- ଲ +- 旱 +- ඹ +- 魄 +- 哋 +- ○ +- 션 +- 꼬 +- ञ +- 婢 +- 캐 +- 烘 +- 苛 +- ջ +- ខ +- ਟ +- 肇 +- 栽 +- 熄 +- 馨 +- ળ +- 紗 +- 채 +- 환 +- 窄 +- 禄 +- 탈 +- 권 +- 腥 +- 噌 +- 祐 +- 妓 +- ୟ +- 訓 +- 淫 +- 懦 +- 昊 +- 磯 +- 糕 +- 贡 +- 篷 +- ሺ +- 捍 +- ങ +- 광 +- 铲 +- ۴ +- 墟 +- 粧 +- 娥 +- ৫ +- ဖ +- 祁 +- 忆 +- 啸 +- 〈 +- ෘ +- 懈 +- ಇ +- 拧 +- 凿 +- 톡 +- չ +- 녀 +- ڤ +- 김 +- 汐 +- 糧 +- 谓 +- 瀑 +- ޝ +- 寓 +- ਣ +- 扒 +- 衅 +- ఈ +- ജ +- ۾ +- 剃 +- 腫 +- 勿 +- ញ +- 奕 +- 깨 +- 卜 +- 꼭 +- ඕ +- 砕 +- 澡 +- 嚣 +- 閥 +- Ĝ +- 邱 +- 簡 +- 飙 +- ৩ +- 芭 +- 떠 +- 庶 +- × +- 蛛 +- 麼 +- औ +- 째 +- 철 +- 锯 +- ぃ +- 鉱 +- 嗰 +- 鹤 +- 혹 +- 嘶 +- 舆 +- 穀 +- 冗 +- 诫 +- 恤 +- 箸 +- 鎮 +- ז +- ᱟ +- 巩 +- 彬 +- 嬢 +- 瘟 +- 诀 +- 埔 +- 砰 +- 舔 +- ષ +- 밤 +- 詰 +- 顕 +- ዘ +- 煽 +- 綾 +- 窥 +- 絆 +- 움 +- 宛 +- 撼 +- ಧ +- 咔 +- 與 +- 陳 +- 芹 +- 陡 +- 掷 +- 廓 +- 逻 +- ബ +- 唆 +- 寧 +- 怯 +- 鹦 +- 裹 +- 翰 +- ቶ +- ෞ +- 벽 +- 奮 +- 너 +- 齋 +- 승 +- ዎ +- ኩ +- 뽑 +- ഇ +- 讳 +- ភ +- 拽 +- ଦ +- 못 +- 彤 +- 當 +- 퍼 +- 缪 +- 唾 +- 渦 +- 跤 +- ዝ +- 掐 +- 峭 +- 梭 +- ށ +- 兮 +- 窑 +- 應 +- 屉 +- ঢ +- 伽 +- џ +- 菩 +- ฮ +- 潤 +- 辑 +- ៉ +- 께 +- 땐 +- 鈴 +- 暂 +- 廖 +- ॉ +- ഒ +- 屡 +- 峻 +- 鹉 +- 熏 +- 鞍 +- 呻 +- 雯 +- 곳 +- 搂 +- 蜡 +- ฉ +- ৪ +- 깔 +- 說 +- 헤 +- 缆 +- 涯 +- ធ +- 掀 +- 孽 +- ഹ +- 奠 +- 련 +- 짐 +- 樊 +- 粋 +- 衙 +- ҕ +- 녁 +- 叭 +- ڊ +- 멀 +- 큼 +- 詠 +- 劈 +- 높 +- আ +- 蕨 +- 掃 +- భ +- 驯 +- 篠 +- 뜨 +- 痪 +- 窮 +- 騎 +- ຟ +- ሮ +- Ř +- 촌 +- 歉 +- ణ +- ฝ +- 잔 +- 沦 +- 绽 +- Τ +- 樣 +- 暢 +- 폰 +- 窍 +- 條 +- 봉 +- 泵 +- ൽ +- 빨 +- 암 +- 纺 +- 寛 +- 悉 +- 潭 +- ୋ +- 焚 +- 畔 +- 嘲 +- 捂 +- 껴 +- 輔 +- 棕 +- 饥 +- ថ +- 籠 +- ဲ +- 佣 +- ଏ +- ڏ +- 泻 +- 柠 +- ഞ +- 류 +- ፣ +- ƙ +- ۽ +- 钞 +- 呜 +- 葫 +- ។ +- 嶽 +- ቢ +- 濁 +- 仨 +- ӣ +- 飓 +- ഭ +- 沐 +- ಆ +- 邀 +- 赁 +- 冕 +- 哗 +- 禽 +- 酶 +- 侃 +- 锈 +- 呪 +- 熔 +- 揚 +- 奢 +- Э +- 趋 +- 嗦 +- ផ +- 衝 +- 窖 +- 阀 +- 扳 +- 摂 +- 응 +- ඊ +- 쉽 +- 筷 +- 妍 +- ओ +- 斬 +- 肋 +- 클 +- 胺 +- 亵 +- 叽 +- 咎 +- 桨 +- ሥ +- 앉 +- 潔 +- ዬ +- 錯 +- 극 +- 宙 +- 禾 +- ৮ +- 궁 +- 넷 +- 丐 +- 睦 +- ଣ +- 끄 +- 딩 +- ફ +- 赋 +- 蘭 +- ۸ +- 논 +- 徊 +- 徘 +- Ш +- 琦 +- 빌 +- 颤 +- 颂 +- 착 +- 氨 +- ։ +- 썼 +- 擅 +- ‐ +- ぉ +- 蓮 +- ৬ +- 쿠 +- Κ +- 釣 +- 捅 +-  +- 拢 +- 鵬 +- 叱 +- ३ +- • +- 嚎 +- 싼 +- ഴ +- 苑 +- 놨 +- 啓 +- 冨 +- 嘢 +- 鷲 +- ଇ +- 킨 +- 邵 +- 狮 +- 燥 +- ޕ +- ಭ +- 犀 +- 馒 +- 癫 +- 沫 +- 늘 +- 瑜 +- љ +- 禧 +- 딸 +- 瘤 +- 咖 +- 抒 +- 棠 +- ५ +- ቱ +- 噂 +- 舵 +- 곤 +- 凄 +- 抠 +- Ņ +- ሌ +- 拷 +- 蓋 +- 寮 +- 斋 +- ४ +- ഉ +- 걱 +- 呦 +- ሬ +- 俭 +- 蚕 +- 揽 +- 컴 +- 舶 +- 맥 +- 焕 +- 倭 +- 睿 +- 瞳 +- 鹊 +- ಎ +- ಥ +- 辻 +- ☎ +- 铛 +- 弔 +- 락 +- 兩 +- 钙 +- 崽 +- Π +- 钝 +- 낫 +- 밀 +- 銘 +- 縛 +- 殊 +- 藍 +- 俞 +- 惩 +- 刁 +- 튜 +- 褪 +- 脾 +- 谤 +- 跋 +- ८ +- 쌤 +- 획 +- Ф +- 呕 +- 雁 +- 躯 +- 迦 +- 恳 +- 啱 +- 梵 +- 關 +- 孵 +- 晏 +- 鴨 +- ਉ +- 棱 +- 疎 +- 恍 +- 匂 +- 咀 +- 濒 +- 渲 +- 绷 +- 냈 +- 淆 +- 胰 +- ሻ +- 實 +- 팀 +- 坟 +- 龚 +- 쁜 +- 谊 +- 溃 +- ћ +- 髦 +- 诽 +- 拎 +- 醤 +- 曇 +- 阎 +- 찾 +- 娩 +- ଥ +- ሲ +- 渺 +- 骼 +- 蘇 +- ६ +- 啃 +- 揣 +- 椰 +- 嗑 +- 弧 +- 懵 +- 찌 +- 笘 +- 辙 +- 渎 +- 苟 +- ۷ +- ቆ +- 빵 +- ଗ +- 汹 +- ۶ +- 檐 +- 喵 +- 騰 +- 窦 +- 歼 +- 葩 +- 범 +- Ș +- 억 +- 隘 +- 襟 +- 逍 +- 攞 +- 櫻 +- ቻ +- 锚 +- 赃 +- 喀 +- 璐 +- 蔼 +- ဘ +- 區 +- 範 +- 绰 +- 沪 +- 碟 +- 沥 +- ଟ +- 딴 +- 쩌 +- ኧ +- ७ +- ਖ +- 胚 +- 篱 +- 幡 +- 嶺 +- 験 +- 掺 +- 蚁 +- ৭ +- ਚ +- 篤 +- 暫 +- 聡 +- 鷹 +- 茉 +- 氟 +- 叩 +- 擁 +- 掰 +- 嗎 +- 乍 +- Ṭ +- 溅 +- 煲 +- Ở +- உ +- 핑 +- 旷 +- 烬 +- 睫 +- 굉 +- 탄 +- 沧 +- 跷 +- 剰 +- 런 +- ๊ +- ઉ +- 岔 +- 枣 +- 渕 +- 扼 +- 咏 +- 佟 +- Ā +- 悚 +- 灶 +- ẵ +- 靳 +- Ο +- 醸 +- 褐 +- 愉 +- 媳 +- 筝 +- 觅 +- 彰 +- 逮 +- 逞 +- 矫 +- 殷 +- 센 +- 盎 +- ঃ +- 巅 +- 聽 +- 澈 +- 亞 +- 姗 +- 赂 +- 鑫 +- 聂 +- 闸 +- Ä +- 퇴 +- 螃 +- 冉 +- 簧 +- 疟 +- 갑 +- З +- 倔 +- 绎 +- 翅 +- 搵 +- 僻 +- 眷 +- 藓 +- ଅ +- ኑ +- Ǧ +- ௌ +- ଯ +- 萝 +- İ +- 芥 +- ٻ +- 壕 +- 謡 +- 媽 +- 긍 +- ձ +- 褶 +- 鉛 +- 첫 +- 惶 +- 笹 +- 徙 +- 搾 +- Ҳ +- 걍 +- 膀 +- 揪 +- ໊ +- 逾 +- 왕 +- ഡ +- 렌 +- 씬 +- 悖 +- 痩 +- 裡 +- 揃 +- 挚 +- 娄 +- 眩 +- 氣 +- ਈ +- ጀ +- 膛 +- ଜ +- ੍ +- ፋ +- 甥 +- 驸 +- 荧 +- 雞 +- 灸 +- 焉 +- 琅 +- 煌 +- 唧 +- 戮 +- 뛰 +- 쩔 +- 팅 +- 춰 +- 骏 +- 凛 +- 볶 +- 欄 +- 镶 +- 墜 +- 猶 +- 械 +- 酌 +- Α +- 듯 +- 瑛 +- 滕 +- 챙 +- 镯 +- 蠕 +- 丙 +- 灼 +- 訟 +- 鄂 +- 馈 +- 柬 +- 黏 +- 轶 +- 豫 +- 氯 +- 幫 +- 喚 +- 惚 +- ઈ +- 渠 +- 镀 +- ୀ +- 漱 +- Ý +- ӯ +- 皈 +- 咗 +- 莺 +- ឹ +- 柵 +- 碱 +- ጊ +- 徑 +- 隋 +- 啰 +- ሊ +- 祠 +- 층 +- 诏 +- 텔 +- 찬 +- 箇 +- 钧 +- 뉴 +- 體 +- 痫 +- 麒 +- փ +- ᱩ +- 녔 +- 搓 +- 柚 +- 竿 +- 轄 +- अ +- ಒ +- 榨 +- 립 +- 蛤 +- 惭 +- 炳 +- 飢 +- ဟ +- ቺ +- 帧 +- 鳞 +- 곱 +- 헬 +- ଆ +- 锣 +- 倪 +- 鸥 +- 錠 +- ڳ +- ဝ +- ๆ +- 견 +- 琶 +- 饵 +- 將 +- 怂 +- 蔭 +- 蛾 +- 榆 +- 鎌 +- 揾 +- 탕 +- ዐ +- 倘 +- 秉 +- 賭 +- 菠 +- 瑙 +- 囱 +- 훨 +- 缕 +- ఓ +- 撩 +- 篡 +- Ī +- 缀 +- 짓 +- 踝 +- 辫 +- Σ +- ၏ +- 邢 +- 氮 +- 존 +- 탁 +- 硝 +- 萱 +- 拴 +- 烛 +- ޗ +- 铝 +- 锥 +- 쇼 +- 暦 +- 焊 +- 릭 +- 腎 +- 瘩 +- 淼 +- Μ +- 乒 +- 牲 +- 疮 +- 蓓 +- 酪 +- 霄 +- ޢ +- ኪ +- 搐 +- 츠 +- 绅 +- ៃ +- 빡 +- 粟 +- 遏 +- 颅 +- ጉ +- 링 +- 퓨 +- 磷 +- 耽 +- 祟 +- 瘍 +- 염 +- 謙 +- 霾 +- 臻 +- 猟 +- 诞 +- ៊ +- 蔷 +- 賊 +- 訂 +- 贅 +- Щ +- 晾 +- Õ +- ଶ +- 霆 +- 喪 +- 剖 +- 嗒 +- 樋 +- 疙 +- 靶 +- 纬 +- ਡ +- 값 +- 娴 +- 貞 +- 擒 +- ൊ +- 跚 +- 厩 +- 烯 +- 炽 +- आ +- 駄 +- 侑 +- 袱 +- 픈 +- 玮 +- 朔 +- ኖ +- ዜ +- 滔 +- 겁 +- 맘 +- 漬 +- 俣 +- 羔 +- 囤 +- 烷 +- 嗣 +- 峠 +- ǎ +- 晗 +- 乓 +- 蹒 +- 髄 +- ڙ +- 睾 +- 兢 +- 咐 +- 铮 +- ೈ +- 睐 +- 蹊 +- 깝 +- 翘 +- 螂 +- 椭 +- ઇ +- 鈍 +- ္ +- 參 +- 眯 +- 秩 +- 敛 +- ଡ +- 鋼 +- 榈 +- 號 +- 掂 +- 磐 +- 萬 +- 裴 +- 阮 +- 蔚 +- 雍 +- 悍 +- 젠 +- ޙ +- 駿 +- 拂 +- 腊 +- 寞 +- 穹 +- 悴 +- 憔 +- 琥 +- 琵 +- 밑 +- 恥 +- ဂ +- 垢 +- 茬 +- 坨 +- 遛 +- 涕 +- 掲 +- 咚 +- 覺 +- 嘈 +- 峥 +- 꼈 +- 蝉 +- 麵 +- ٠ +- 땜 +- 祀 +- 譜 +- ൈ +- ᱱ +- 량 +- 酥 +- 橇 +- 靡 +- 槐 +- Ấ +- 錢 +- 槻 +- 暧 +- 侥 +- ଧ +- 척 +- 턴 +- 吠 +- 甫 +- 壌 +- 갖 +- 鳳 +- ኦ +- 놔 +- 쓴 +- 寅 +- 麓 +- Ö +- ഥ +- 驭 +- ቦ +- ጅ +- 낙 +- 鹃 +- Ա +- 屌 +- 壤 +- Å +- 낮 +- ဒ +- 隶 +- 讓 +- 豌 +- 憾 +- 噢 +- 闫 +- ฤ +- ժ +- ٔ +- 岬 +- 萤 +- ӳ +- ճ +- ଭ +- 匣 +- 효 +- 傑 +- 완 +- 瑾 +- 荫 +- ଷ +- ጨ +- 액 +- 諭 +- છ +- 痊 +- ऊ +- 羹 +- 漩 +- 砾 +- 岚 +- 裳 +- 규 +- థ +- ψ +- 콘 +- 惟 +- 렵 +- 짧 +- 钏 +- 窜 +- 臼 +- 闽 +- Ó +- 內 +- 诬 +- 樂 +- 煞 +- 蝎 +- 弛 +- 從 +- 닭 +- င +- 缔 +- 岌 +- 怼 +- Ò +- 巍 +- 곡 +- 啼 +- 桓 +- 谂 +- 賂 +- 鳍 +- 摧 +- ޤ +- 킹 +- 冀 +- 彗 +- 铐 +- 銚 +- Ṣ +- 깜 +- 烙 +- 蜱 +- 梶 +- 胴 +- 莞 +- Ọ +- Ӱ +- 呱 +- « +- 狸 +- 瑕 +- 痰 +- 纫 +- 臆 +- ዙ +- 紊 +- 肴 +- 迂 +- 隼 +- 稜 +- 꿀 +- 茁 +- 淌 +- ഫ +- 꽤 +- 믄 +- 赣 +- ๋ +- 洽 +- 농 +- 霹 +- 倚 +- 晦 +- 踹 +- ᱠ +- 덕 +- 嗖 +- 濡 +- 猬 +- 낼 +- 덜 +- Ε +- 撬 +- 呛 +- þ +- ጂ +- 珑 +- 瑠 +- 虜 +- 훈 +- 晤 +- 舘 +- 嗜 +- 渓 +- 瓢 +- 眶 +- 쳤 +- 瞰 +- 腌 +- 勲 +- 蕴 +- 韶 +- 뜻 +- 懿 +- 蕊 +- ቁ +- 黃 +- 陨 +- 鳌 +- 匕 +- 籽 +- 냄 +- 帶 +- 稠 +- 餓 +- 裾 +- 梧 +- ̇ +- ኬ +- Ḍ +- Я +- ় +- 닌 +- 荻 +- ਧ +- 烨 +- 瘸 +- 蜿 +- 齊 +- 團 +- 姻 +- ឡ +- 瞻 +- ฎ +- 捎 +- ሜ +- 嚇 +- ଙ +- 殉 +- ቼ +- 厥 +- ਫ +- ጎ +- 酗 +- 哔 +- 刨 +- 侨 +- 痹 +- 섭 +- 웬 +- 骡 +- 汝 +- 蜒 +- 桦 +- 嘣 +- 冶 +- 峙 +- 樽 +- 變 +- 湛 +- 雳 +- 呸 +- 數 +- 檀 +- 辍 +- 笋 +- ៀ +- 崭 +- 꿈 +- 藉 +- ຝ +- 悄 +- օ +- ଳ +- ಉ +- 롱 +- 쌓 +- ॅ +- 엽 +- 총 +- 寥 +- ՝ +- 롤 +- 沌 +- 允 +- 咄 +- 撂 +- ኋ +- 苇 +- 賣 +- 엄 +- 恙 +- 碾 +- 蜷 +- 닥 +- 觀 +- 겹 +- 茵 +- 蹑 +- 吝 +- 璇 +- ໋ +- ័ +- 墩 +- 猥 +- 널 +- 잤 +- 玷 +- 薫 +- 蹬 +- 弈 +- ፊ +- 俐 +- 頬 +- 秤 +- ଛ +- ၊ +- 쎄 +- 獅 +- 橫 +- ך +- 吏 +- 抨 +- 鯉 +- 腱 +- ඛ +- 缰 +- 秽 +- 렇 +- 锄 +- 呉 +- ሙ +- 찰 +- 苯 +- 람 +- 摁 +- 幣 +- 圍 +- 俏 +- 撵 +- Ś +- 혜 +- 炕 +- ଚ +- 猝 +- 쭉 +- 穏 +- 珂 +- ਭ +- 멋 +- 喳 +- 狡 +- 嘀 +- 韬 +- 瞭 +- 惺 +- ಖ +- 끌 +- 쌍 +- 讽 +- 缎 +- 庐 +- 拣 +- 槟 +- 侶 +- 嘆 +- 뻔 +- 單 +- 處 +- 殆 +- 噛 +- 庚 +- 뻐 +- 즐 +- 梓 +- 램 +- 빈 +- 雏 +- 諮 +- 嚏 +- 득 +- 蜥 +- 뿌 +- 鳩 +- 谕 +- 匮 +- 婊 +- 匙 +- 炬 +- 싱 +- 楓 +- 畸 +- 頓 +- ஒ +- 噪 +- 犁 +- 懊 +- 谚 +- 畴 +- < +- ቡ +- 讐 +- 鮭 +- 꿔 +- 隷 +- 梢 +- 鯛 +- 坷 +- 晟 +- 簸 +- 疵 +- 閲 +- 낸 +- 컨 +- 흥 +- 眸 +- 侏 +- 臺 +- '>' +- 짤 +- 胱 +- Γ +- 沁 +- 墳 +- 襄 +- 蛎 +- 빙 +- 矣 +- 湊 +- ಈ +- 믿 +- 잉 +- ෛ +- 蟑 +- 畠 +- 셋 +- 槿 +- 嬛 +- 狈 +- 羚 +- 隈 +- 泞 +- 荐 +- 吭 +- 剁 +- ٘ +- 炙 +- 蟆 +- 牽 +- 翡 +- 튀 +- ଁ +- ᱤ +- 嗷 +- 绮 +- 簇 +- 娱 +- 幌 +- 犊 +- 漓 +- ቴ +- 牟 +- 昕 +- 념 +- 诋 +- 俸 +- 翩 +- 钥 +- 淤 +- 玻 +- 绥 +- 칼 +- 澎 +- ٫ +- 攘 +- 溉 +- 瀚 +- 庵 +- ઘ +- 塀 +- 茧 +- 哆 +- 롯 +- 挿 +- 殡 +- 荃 +- 釉 +- 窯 +- 伶 +- 爺 +- 瞑 +- ڌ +- 厮 +- 汶 +- 풍 +- 팬 +- 兒 +- 絹 +- 薩 +- 祷 +- 慑 +- 憂 +- ฏ +- 嫉 +- 輩 +- 拯 +- 淇 +- 馋 +- ᱭ +- 啄 +- 猖 +- 絮 +- Δ +- 穗 +- 녹 +- 맡 +- 켓 +- Ē +- 辗 +- ఖ +- 嫣 +- Η +- ዊ +- 轉 +- 惮 +- 傀 +- 儡 +- 啬 +- 颓 +- 渭 +- 筐 +- ଉ +- ዱ +- 샤 +- ሸ +- ឺ +- 鞘 +- 驿 +- 푸 +- 桅 +- 浙 +- 俾 +- 叼 +- ቂ +- 愕 +- 聲 +- 넓 +- 赘 +- 蛊 +- 儲 +- 岖 +- 죄 +- È +- 蛍 +- 朦 +- ዕ +- 貂 +- 뀌 +- 폭 +- 權 +- 渉 +- 跺 +- 酝 +- ጭ +- 焙 +- 渇 +- 筱 +- ඝ +- 撸 +- 洼 +- 脐 +- 曖 +- 옮 +- ኳ +- 翟 +- 쟁 +- 蘸 +- 稔 +- 紳 +- 喱 +- 蚤 +- 雹 +- 鞅 +- 鲤 +- ☆ +- 妬 +- 蹈 +- 捋 +- 矜 +- ђ +- 털 +- 潼 +- ኒ +- ዶ +- 袒 +- 멘 +- 閑 +- 陌 +- অ +- 玺 +- 羁 +- 卤 +- 渝 +- 戎 +- ෆ +- ჟ +- 坍 +- 邑 +- 昵 +- 忒 +- 砌 +- 磋 +- 汲 +- 웨 +- 젤 +- 珈 +- 綻 +- 镰 +- 暁 +- 敖 +- 槌 +- 踱 +- 堺 +- ᱮ +- 摞 +- ቹ +- 钳 +- 倶 +- 诛 +- 寐 +- 邹 +- 缭 +- 솔 +- 撲 +- 醍 +- 樓 +- ः +- 胧 +- 讪 +- 醐 +- ݨ +- ඨ +- 蜴 +- 毗 +- 虞 +- 떼 +- 롭 +- 聯 +- 钠 +- 峯 +- 璋 +- 賓 +- 淵 +- ~ +- 蜚 +- ޚ +- 湃 +- ̍ +- ಫ +- 亢 +- 腑 +- 숨 +- ౌ +- 枷 +- 檜 +- 谬 +- 岑 +- 袄 +- 铀 +- Ì +- 鲑 +- \ +- 杵 +- 韭 +- 콜 +- 烹 +- ඬ +- ኸ +- ਥ +- 킬 +- ඥ +- 熨 +- 擂 +- 赡 +- 滿 +- ሷ +- 摯 +- 诠 +- 匡 +- 谟 +- 瞩 +- 腓 +- 黯 +- ڼ +- 帚 +- 뿐 +- 엑 +- 흐 +- ଂ +- 顷 +- ឆ +- 潦 +- 탔 +- ኘ +- 앙 +- 箔 +- 똑 +- 匾 +- 징 +- ሴ +- ጃ +- 邝 +- 딜 +- ဉ +- 漪 +- 胁 +- 羲 +- 耙 +- 馀 +- 訊 +- 氪 +- 瘠 +- ١ +- 닝 +- 젊 +- 흔 +- 邉 +- 骤 +- 슨 +- 骰 +- 滇 +- 繕 +- 辦 +- ஃ +- 抉 +- 渍 +- 彝 +- 曰 +- 顎 +- 랩 +- 슈 +- 씻 +- 냉 +- 율 +- 縣 +- 询 +- 럴 +- Մ +- 亥 +- 樟 +- 樫 +- 畫 +- 홀 +- 尧 +- 骄 +- 饷 +- 걘 +- 얻 +- 浊 +- 령 +- 씀 +- ぅ +- 鐵 +- 蔗 +- 遼 +- 谛 +- 杭 +- 毋 +- ၀ +- 땡 +- ሠ +- 憤 +- 涟 +- 芜 +- 鸳 +- 콩 +- 椿 +- 怦 +- 鸯 +- ઝ +- ၎ +- ऐ +- 擬 +- 酯 +- 灘 +- 钮 +- 痢 +- 醫 +- 餸 +- 꽃 +- 凱 +- 忡 +- 酎 +- 냥 +- 吮 +- 弩 +- 續 +- 荔 +- 狳 +- 產 +- ጆ +- 犰 +- 骇 +- ଖ +- 噼 +- 紡 +- 줌 +- 짱 +- 趕 +- 峨 +- 컬 +- 粑 +- 攸 +- ŋ +- 嗱 +- ኤ +- 槙 +- Ҫ +- ዓ +- 몬 +- 拗 +- 營 +- 帥 +- 曙 +- ጫ +- 锻 +- 浚 +- 幂 +- 俑 +- 笃 +- 鬱 +- 廿 +- 祺 +- 夠 +- 杞 +- 钾 +- ݙ +- 舷 +- 購 +- 瘀 +- 萃 +- 蜓 +- 즌 +- 筏 +- 涤 +- 曳 +- ฆ +- 랄 +- 蒼 +- 忑 +- ៍ +- 胭 +- ቲ +- ቷ +- 嬷 +- 釧 +- 鳗 +- 忐 +- 鼹 +- 緻 +- 轼 +- 벤 +- 傳 +- 悸 +- 렀 +- 訣 +- 暉 +- 锌 +- 哧 +- 娲 +- 禹 +- 窘 +- 蜻 +- Ł +- 嚓 +- 눠 +- 덴 +- 똥 +- 춤 +- 唄 +- 諾 +- 楔 +- 薗 +- 錮 +- 楷 +- 遡 +- ڀ +- 폴 +- 鱿 +- 嗝 +- 델 +- ഈ +- 뚫 +- 켰 +- 柑 +- 啡 +- 漕 +- 凪 +- 蹋 +- ഏ +- ─ +- 煩 +- 객 +- 專 +- 篝 +- Ż +- 깊 +- 댓 +- 웹 +- ੜ +- 迢 +- 钰 +- 芊 +- 薰 +- ੌ +- ᱢ +- 孜 +- Ķ +- 渤 +- 绯 +- 碇 +- 諏 +- Ĵ +- 帷 +- 涸 +- 蟋 +- 芮 +- 邋 +- 懇 +- 擎 +- 馍 +- 掖 +- 鯨 +- ዩ +- 燒 +- 钵 +- ૃ +- 瞌 +- ഖ +- 沽 +- 蟀 +- ќ +- 锂 +- 荚 +- 鼬 +- 덟 +- 딘 +- 쌀 +- 蟒 +- 嫦 +- 藝 +- 姊 +- 률 +- ఐ +- 漉 +- 誕 +- 寫 +- 憬 +- 萦 +- 균 +- 낀 +- 압 +- 옥 +- 卉 +- 焰 +- 偎 +- 殃 +- 肛 +- 噔 +- 鹭 +- ٺ +- 壺 +- 蝦 +- 멍 +- 윤 +- 컵 +- 픽 +- Ê +- 榊 +- 總 +- 灏 +- 噺 +- 紺 +- 捨 +- 厕 +- 盪 +- 脯 +- 驹 +- ᱜ +- 崴 +- 榛 +- 邯 +- 鬟 +- Հ +- ऱ +- 섬 +- 왠 +- 竺 +- 勅 +- 栾 +- 譬 +- 琛 +- 蕃 +- 홉 +- 骁 +- 栩 +- 攥 +- 俪 +- 刽 +- 笙 +- 遢 +- 삶 +- 걷 +- 눌 +- Š +- 菁 +- 輕 +- 妳 +- 汾 +- 磺 +- 醺 +- ሶ +- ፕ +- ᱚ +- 댄 +- 셀 +- 템 +- 憨 +- 錬 +- 喙 +- 氦 +- 땅 +- 릉 +- 肽 +- 颯 +- 黔 +- 锵 +- 릿 +- 밍 +- 绛 +- ヂ +- 嫖 +- 炜 +- 瘪 +- 臊 +- 벗 +- 봄 +- 쟤 +- 폐 +- 蒿 +- 엘 +- 詣 +- 羨 +- 캠 +- 孰 +- 铬 +- 恺 +- 恢 +- 佰 +- 蚱 +- 渥 +- 纂 +- 纶 +- 벅 +- 叡 +- 捶 +- 綺 +- 眾 +- 憧 +- ऑ +- ڄ +- 昴 +- 誹 +- 謗 +- 棣 +- 汕 +- 沏 +- ᱫ +- 빛 +- 馁 +- 鵜 +- 涮 +- ঈ +- ଼ +- 앤 +- 玟 +- 芷 +- 妒 +- 柊 +- 琊 +- 竣 +- 斟 +- 騙 +- Έ +- ዮ +- 瀧 +- 艮 +- ဗ +- 糙 +- 闵 +- ጓ +- 腳 +- 蜕 +- 韻 +- 뮤 +- ඵ +- 낭 +- 룸 +- 蝗 +- 蹂 +- ਏ +- 腮 +- 럭 +- 쁠 +- ඡ +- 诧 +- 镁 +- 鄉 +- 鍾 +- 窿 +- 蚪 +- 蝌 +- 疡 +- ዴ +- 砥 +- 뷰 +- 엉 +- 皙 +- 撅 +- 犟 +- 娯 +- 掴 +- 둥 +- 헐 +- 혈 +- 砺 +- ਯ +- 麹 +- ฬ +- 籔 +- 럼 +- 捻 +- 벨 +- 홈 +- 嫡 +- 睛 +- ぢ +- 遷 +- 伎 +- ᱾ +- 哽 +- ឈ +- 堰 +- 磡 +- 焱 +- 翎 +- 矗 +- 翊 +- Ν +- ೃ +- ၁ +- 腋 +- 舊 +- 豬 +- 꼐 +- 썰 +- 펜 +- 坞 +- ஏ +- 糯 +- ޞ +- 倣 +- 凋 +- 엠 +- 헌 +- 흘 +- 諜 +- ヅ +- 頸 +- ሂ +- ፎ +- 꼼 +- 둔 +- 삭 +- ♫ +- 貓 +- 踵 +- 惋 +- 價 +- 歡 +- 昱 +- 浣 +- 讹 +- 喆 +- 擔 +- ף +- 龛 +- 艶 +- 苣 +- 涓 +- 躏 +- 窪 +- 屹 +- 恬 +- 裘 +- 糗 +- 绚 +- 錄 +- 霓 +- 噱 +- 껄 +- 槍 +- 蹩 +- '~' +- 孢 +- 춘 +- 힐 +- 햄 +- 夯 +- 潟 +- 漾 +- 偃 +- 咣 +- 癞 +- 訃 +- 딨 +- Ն +- 찜 +- 촬 +- 춥 +- 罠 +- 麸 +- 獭 +- 镳 +- 铠 +- ጡ +- 吒 +- 삐 +- 잃 +- 찐 +- 틱 +- 핀 +- 皐 +- 闺 +- 榎 +- 娼 +- ୂ +- ឋ +- 阂 +- 몽 +- Ļ +- Ս +- ృ +- 詫 +- 铎 +- 僅 +- 菓 +- ៏ +- 卯 +- 噎 +- 榕 +- 馄 +- 樵 +- 랙 +- ᱛ +- 괴 +- 낳 +- 힌 +- 疱 +- 舜 +- 祛 +- 箍 +- 劉 +- 枸 +- 盹 +- 觑 +- ൃ +- 戯 +- 驗 +- 례 +- 핫 +- Җ +- Қ +- 醛 +- 嘩 +- 慵 +- 섞 +- 큐 +- 팩 +- ઠ +- ឌ +- 圖 +- 藕 +- 蚓 +- 呑 +- 喩 +- 곧 +- 닫 +- 밝 +- 앨 +- 毡 +- 跛 +- 秧 +- 嗮 +- 褛 +- 讷 +- 툰 +- '@' +- 虱 +- 蟾 +- 壱 +- 晖 +- 碜 +- 嫔 +- Ă +- Ģ +- ٢ +- 鋳 +- 협 +- 颚 +- 踮 +- 酰 +- 鳟 +- 禍 +- 阜 +- 挛 +- 箕 +- ᱧ +- 腩 +- 꽂 +- 쏘 +- 탑 +- 碘 +- 잖 +- 彷 +- 鸠 +- 缇 +- 绢 +- ฑ +- 猕 +- 밴 +- 퀴 +- 戟 +- 巳 +- 隕 +- 啤 +- 擢 +- 睽 +- 辘 +- 痉 +- ፖ +- 熠 +- 鞑 +- 饨 +- ሐ +- 褴 +- 阈 +- 묻 +- 烏 +- 酚 +- 惣 +- ຣ +- 阑 +- 犒 +- 碉 +- 龈 +- 靼 +- ਘ +- 蛰 +- 阙 +- 놈 +- 땠 +- 鬃 +- 또 +- 哝 +- Ồ +- 낄 +- 싹 +- 첨 +- 蔻 +- 鑼 +- 毀 +- 痣 +- 釘 +- 偕 +- 湍 +- 燎 +- 钗 +- 咫 +- 덩 +- 箋 +- 罹 +- 孚 +- 宓 +- ඤ +- ቋ +- 燈 +- 왼 +- 꿨 +- 흰 +- ၍ +- 庾 +- 盡 +- 躇 +- 鎧 +- 幢 +- 浒 +- 胤 +- 荤 +- 鼾 +- 孀 +- 蹿 +- 裝 +- 獾 +- 皖 +- 蛀 +- 蚯 +- 颐 +- 좌 +- 棲 +- ^ +- 獗 +- 睑 +- 苞 +- 鳕 +- 腦 +- 뒷 +- Ј +- 楞 +- ଠ +- ኞ +- ዞ +- 납 +- 떴 +- 쩍 +- 斷 +- 憶 +- 泸 +- 勸 +- 铰 +- 洱 +- 緯 +- 겸 +- ૌ +- 粽 +- 咤 +- 宕 +- 쿨 +- ֆ +- ڃ +- ‟ +- 鞦 +- 빤 +- 뼈 +- 슴 +- 엇 +- 칸 +- 莘 +- 挝 +- 韆 +- 夭 +- 腆 +- Β +- 뚱 +- 칭 +- 閃 +- 蜍 +- 謹 +- 迄 +- 坯 +- 傣 +- 谏 +- 祯 +- 羯 +- 鹑 +- ሃ +- Բ +- ഓ +- 寶 +- 戲 +- 轧 +- 拮 +- 椛 +- 佃 +- 蠣 +- 礎 +- 囧 +- 丕 +- 摒 +- 榻 +- ヱ +- 扈 +- 薙 +- 렁 +- ‧ +- 啷 +- 舉 +- 侬 +- 邃 +- 垛 +- 怔 +- 闰 +- ፓ +- 혔 +- ⁄ +- 氰 +- 끗 +- 덮 +- 證 +- 虻 +- 俚 +- 壑 +- 瞿 +- 藥 +- 衩 +- ѳ +- 喎 +- 醜 +- 갠 +- 뒀 +- 멜 +- 컸 +- 핸 +- 뚝 +- 晌 +- අ +- ೌ +- 扪 +- '|' +- 荨 +- 깎 +- 짬 +- 雙 +- 髅 +- 圃 +- 蓟 +- 鹫 +- 榮 +- 绫 +- 藐 +- 贻 +- ኢ +- 亂 +- 禮 +- 賜 +- 샵 +- 측 +- ฒ +- 胥 +- 骷 +- 앴 +- 褂 +- 較 +- 恃 +- 垦 +- 麝 +- 诲 +- 뜬 +- 갚 +- 턱 +- ፅ +- 媲 +- 馏 +- 駕 +- 톤 +- 닮 +- 壽 +- 剽 +- 篓 +- 馊 +- 鹂 +- 圓 +- 壹 +- 잊 +- Ե +- ٩ +- 亘 +- 缽 +- 翱 +- 貪 +- 얀 +- 펴 +- ঔ +- 罂 +- 鳖 +- 黝 +- 汞 +- 痺 +- 佯 +- 稷 +- 恪 +- 彌 +- 砦 +- ጪ +- 爭 +- 찔 +- 痞 +- 喰 +- 狒 +- 戬 +- 簪 +- 憑 +- ‒ +- 嗌 +- 嗤 +- 囉 +- 諒 +- 뚜 +- 팡 +- 罵 +- 楊 +- 噶 +- 凜 +- ≪ +- 〜 +- 匐 +- 芈 +- 鹌 +- 诃 +- 鳃 +- 】 +- 涎 +- 腼 +- Є +- ጤ +- 泷 +- 漸 +- 蚌 +- 끈 +- 땀 +- 띠 +- 爾 +- 脓 +- 髋 +- ૈ +- 廳 +- 虔 +- 缜 +- 楢 +- 褥 +- 暄 +- 禺 +- 怅 +- ፒ +- 阉 +- 끓 +- 렉 +- 벼 +- 뽀 +- 틴 +- 팠 +- Ӹ +- 褚 +- 啜 +- 雖 +- ୱ +- 【 +- 槲 +- ଫ +- 雉 +- 馥 +- 烊 +- ୃ +- 摹 +- 羟 +- 騨 +- 琢 +- 钊 +- ൺ +- ٥ +- ඈ +- 꼴 +- 댔 +- 豐 +- 廻 +- 沂 +- 啾 +- 埼 +- 徨 +- 剐 +- 唏 +- 轲 +- 牒 +- ဏ +- 븐 +- 샘 +- 據 +- 峪 +- 錫 +- 밖 +- 漳 +- 炅 +- 荼 +- ၂ +- 玖 +- 絢 +- 镍 +- 沓 +- 뺐 +- 칙 +- 桝 +- 啕 +- 紐 +- 讀 +- 凰 +- 麥 +- 吆 +- 淞 +- 瓮 +- 耦 +- 佘 +- 喋 +- 孺 +- 螨 +- ៈ +- 苫 +- 闩 +- Θ +- 醬 +- 뷔 +- 篆 +- 鉾 +- 蔬 +- 桔 +- 锢 +- 讣 +- 辕 +- 鸵 +- 꽁 +- Ԑ +- 赈 +- 遁 +- 隨 +- 掸 +- 暨 +- 陇 +- 宦 +- 戛 +- 睬 +- ፡ +- 嘚 +- 歲 +- 汛 +- 렛 +- 씹 +- 잇 +- 嚴 +- 檢 +- 흑 +- 麋 +- 佼 +- 랫 +- 鮎 +- 脖 +- 歷 +- 勞 +- 楂 +- 蚂 +- Կ +- 兀 +- ሏ +- 舛 +- 녕 +- 뇌 +- 릇 +- 짠 +- 힙 +- 冴 +- 蕎 +- 崗 +- 膿 +- 侮 +- 茛 +- 攪 +- 逅 +- 匍 +- 邂 +- 瘁 +- Ի +- ڇ +- ឃ +- △ +- ঋ +- ਝ +- 曬 +- 멈 +- 묵 +- 컷 +- 혁 +- 휘 +- 깃 +- 椋 +- ヵ +- 潺 +- 螳 +- 鳅 +- ಘ +- ဇ +- 迸 +- 拄 +- 浃 +- 钛 +- 낚 +- 얇 +- 헷 +- 흡 +- ਛ +- 搀 +- 렴 +- 黜 +- 闆 +- 惬 +- 帛 +- 戰 +- ৷ +- ଘ +- € +- 蜢 +- 鲈 +- 遐 +- 뽕 +- 캡 +- 砧 +- 碼 +- 邨 +- ឱ +- 溥 +- 咿 +- 锹 +- 砚 +- 熹 +- Ả +- ‚ +- 珺 +- 紬 +- Ẹ +- 凧 +- 戊 +- 곰 +- 畢 +- 鱗 +- 苹 +- 笞 +- 箫 +- Դ +- 瀕 +- Ū +- ፉ +- 谩 +- 겉 +- 쭈 +- 탐 +- Ό +- 廟 +- 怆 +- 絕 +- 꽉 +- 鬣 +- 塵 +- 羡 +- Λ +- 댕 +- 뜯 +- ጽ +- 稱 +- 覗 +- 꾼 +- 劃 +- 卻 +- 栞 +- 雛 +- 跆 +- 抿 +- 잘 +- 啧 +- 俨 +- 汴 +- 賽 +- 叟 +- Ҙ +- ଓ +- 渾 +- 糾 +- 긁 +- "\x93" +- Ġ +- ǹ +- ٰ +- ឿ +- 泯 +- 泾 +- 묘 +- 쪘 +- 쫓 +- 婧 +- 籁 +- 淄 +- 痨 +- 弑 +- 忱 +- 淨 +- 縞 +- 酣 +- 繼 +- Ι +- ѓ +- ଞ +- 壞 +- 겪 +- 烽 +- 묶 +- 썹 +- 풋 +- 宍 +- 苷 +- 靜 +- 羌 +- 矯 +- Χ +- 啮 +- 胯 +- 賺 +- 吖 +- 냅 +- 붕 +- 쉴 +- ૉ +- 獨 +- 덤 +- 詮 +- 臃 +- 焖 +- 獒 +- 紹 +- 諗 +- 岱 +- 璨 +- 讃 +- 둬 +- 璧 +- 痔 +- 冽 +- 舀 +- 弼 +- 徇 +- 綠 +- 繭 +- 镣 +- 驮 +- ऋ +- 搡 +- 搪 +- 砷 +- 닷 +- 닿 +- 땄 +- 욱 +- 웅 +- 찝 +- ဦ +- 噗 +- 醚 +- 谑 +- 紘 +- 炷 +- 枭 +- 罔 +- 蚣 +- 颌 +- 璞 +- 닦 +- ĺ +- ሕ +- 馴 +- 긋 +- 랍 +- 咨 +- 粱 +- ໆ +- 蛭 +- 骋 +- 囡 +- ץ +- 劵 +- 濟 +- 纣 +- 荟 +- 蛟 +- 뀐 +- 밟 +- Պ +- 膠 +- ៗ +- ൗ +- 狛 +- 琰 +- 畝 +- 觐 +- 擀 +- 蒔 +- 蚜 +- 귄 +- 빅 +- 쫄 +- 촉 +- 螈 +- 蚝 +- 郸 +- 飴 +- 鸞 +- 娓 +- 皎 +- 眈 +- 捺 +- 砒 +- 糞 +- 茱 +- 裆 +- 갤 +- 뺏 +- 쭐 +- 쿄 +- 팟 +- 赝 +- 犸 +- 蜊 +- 惆 +- 瘴 +- 笆 +- 讥 +- 钨 +- 驷 +- 宸 +- 戾 +- 賑 +- ڍ +- ሯ +- ጦ +- 랭 +- 룩 +- 뺄 +- 샐 +- 숫 +- 팝 +- Ո +- 桟 +- 罄 +- 槃 +- 捗 +- 轱 +- 濕 +- 谙 +- ሦ +- 榔 +- 溫 +- 璀 +- 诣 +- 늙 +- 썩 +- 쫙 +- 톱 +- 爛 +- 铂 +- 奧 +- 鹈 +- 赳 +- 膵 +- 渚 +- 缈 +- 耘 +- 唰 +- 綴 +- 豺 +- 龊 +- 龌 +- ٣ +- ඌ +- 犷 +- 葆 +- 颞 +- 馳 +- 릎 +- 숭 +- 쌩 +- 썸 +- 祇 +- 險 +- 蝓 +- 鴻 +- 熾 +- 蛞 +- 沱 +- 潍 +- 堇 +- ਐ +- 尬 +- 粕 +- 辄 +- 껍 +- 넨 +- 룹 +- 샌 +- 쌌 +- 좁 +- 핵 +- Գ +- 榉 +- 臥 +- 鹕 +- 蟻 +- 咝 +- 缥 +- ∞ +- 刍 +- 惘 +- 牦 +- 絲 +- ጁ +- 屬 +- 讧 +- 돋 +- 맙 +- ౦ +- ᱞ +- 啟 +- 뺀 +- 埗 +- 悶 +- 毂 +- 贋 +- 顆 +- 鲟 +- 孬 +- 昙 +- 薮 +- 壓 +- 狀 +- 缤 +- 藜 +- 鹽 +- 麩 +- 갓 +- 멤 +- 믹 +- 뱅 +- 붓 +- 윗 +- 쩐 +- 췄 +- 斓 +- 莴 +- 펙 +- ヲ +- 釋 +- 袅 +- 躊 +- 阚 +- 囔 +- 铆 +- Ț +- ሑ +- ጌ +- 兎 +- 戍 +- 涝 +- 诟 +- 铿 +- 쨌 +- 쩨 +- Ĥ +- Ӧ +- ፀ +- 冑 +- 廠 +- 앱 +- 秆 +- 糠 +- 鮫 +- 桧 +- 垩 +- 耷 +- 镐 +- 엊 +- 夙 +- 宥 +- 濠 +- 繍 +- ઢ +- 佗 +- 戶 +- 皑 +- 蝾 +- 쫍 +- 莆 +- 饉 +- 懷 +- එ +- 啁 +- 赊 +- 鸪 +- 挞 +- 鹧 +- ඓ +- 蔫 +- 뜩 +- 띄 +- 략 +- 뤄 +- 맵 +- 폼 +- 甬 +- 烩 +- 碴 +- 钍 +- 掇 +- 閒 +- 奘 +- 羿 +- 趙 +- Ō +- Ά +- ഘ +- ၉ +- ቪ +- 弋 +- 斡 +- 衲 +- 辿 +- 쥐 +- 쪄 +- 珞 +- 瓯 +- 壬 +- 鬧 +- 忖 +- ¥ +- ဓ +- 肓 +- 薅 +- 靈 +- 靛 +- ఘ +- ಏ +- 轭 +- 윙 +- ÿ +- 亟 +- 呲 +- 咻 +- 硒 +- 絨 +- 魯 +- 껌 +- 덥 +- 듬 +- 빽 +- 飒 +- 迥 +- Վ +- 彙 +- 즘 +- ϊ +- 蜈 +- 嗲 +- 觎 +- 轟 +- 귤 +- 낯 +- 쌈 +- 찢 +- 쾌 +- 팍 +- ဥ +- 揄 +- 煦 +- 熵 +- 淺 +- 玫 +- 哐 +- 藪 +- Ñ +- Φ +- Ԥ +- ٤ +- 埠 +- 拈 +- 炯 +- 굽 +- 뻤 +- 뿔 +- 셜 +- Į +- Թ +- 珏 +- 疽 +- 缨 +- 揶 +- 囃 +- 梱 +- 餡 +- 鰹 +- 燻 +- Ħ +- 撥 +- 爻 +- 脫 +- 雫 +- 霁 +- ኙ +- 擇 +- 踌 +- 鬓 +- 겐 +- 쩡 +- 텀 +- 텍 +- ਓ +- 뭉 +- 햇 +- 鯖 +- 髭 +- 傭 +- 蛐 +- 鰻 +- 遜 +- ഠ +- › +- 怄 +- 裟 +- 啩 +- 恣 +- 斛 +- 檻 +- 谔 +- 踞 +- 닉 +- 맹 +- 잼 +- 詐 +- 櫃 +- 浏 +- 綬 +- 攫 +- 冢 +- 綜 +- 芃 +- 歆 +- 殇 +- 鄭 +- 鲱 +- 黨 +- ፃ +- ᱨ +- Ụ +- ‹ +- 滓 +- 濤 +- 갱 +- 앗 +- 쵸 +- 캔 +- 抡 +- 輿 +- 擊 +- 苓 +- 歎 +- 幔 +- 愫 +- 萼 +- 檸 +- 嵇 +- 薏 +- 蘿 +- Ӓ +- ፌ +- 딪 +- Ք +- ሔ +- 憐 +- 洸 +- 燧 +- 珩 +- 껏 +- 셉 +- 즉 +- 펌 +- 珐 +- 瘙 +- 粵 +- 膽 +- 荏 +- 镊 +- 劾 +- 妲 +- 缮 +- 靚 +- 鳏 +- Ţ +- ఠ +- ቄ +- 昀 +- 椀 +- 袤 +- 遽 +- 흠 +- Џ +- 仃 +- 婵 +- 诩 +- 鍼 +- 쉐 +- 탱 +- 奚 +- 饯 +- 桢 +- 樺 +- 锭 +- 蚬 +- 跻 +- ޠ +- 膺 +- ԑ +- ޏ +- ဤ +- 哂 +- 攰 +- 葦 +- 錐 +- "\x94" +- ỵ +- 靭 +- 뺑 +- 엥 +- 탠 +- 帼 +- 〇 +- 姪 +- 晰 +- 摺 +- ឬ +- 恻 +- 惡 +- 蹉 +- à +- ၅ +- 궈 +- 멸 +- 빴 +- 쇠 +- 푼 +- Տ +- 巌 +- 掳 +- 撚 +- 膈 +- 蛹 +- 胛 +- 舫 +- 缚 +- 舐 +- 鲫 +- 荞 +- 豉 +- 叻 +- 骛 +- 龋 +- 糜 +- 둑 +- 뢰 +- 륵 +- 얹 +- 煉 +- 痤 +- 蝽 +- 煨 +- 膦 +- 嚿 +- 蛆 +- 蟲 +- 钴 +- 顯 +- '}' +- ᱴ +- 歸 +- 湄 +- Ő +- Լ +- Ռ +- ٦ +- 銷 +- 깥 +- 뱃 +- 嶙 +- 鄱 +- 錣 +- 烃 +- 謁 +- 炀 +- 潢 +- 煜 +- 娑 +- 痿 +- ၆ +- ቨ +- 跎 +- 꼽 +- 댁 +- 밭 +- 섹 +- 숏 +- 쎈 +- 쥬 +- 츄 +- 칵 +- 콤 +- ૅ +- 瑰 +- 襷 +- 楕 +- 瞠 +- 酉 +- 鹬 +- 叵 +- 婕 +- 庖 +- 觊 +- 謂 +- 闊 +- ٨ +- 偌 +- 挎 +- 锏 +- 룰 +- 뭣 +- 봇 +- 빔 +- 융 +- 찼 +- 퉁 +- 헛 +- 汎 +- 蟠 +- ゞ +- 箏 +- 峋 +- 堑 +- 痍 +- 纥 +- 勵 +- 粼 +- 钚 +- 퀄 +- ᱥ +- 窩 +- 螯 +- 髻 +- 鱲 +- ′ +- 巽 +- 谄 +- 꼰 +- 뇨 +- 뜰 +- 볍 +- 빚 +- 툭 +- 펑 +- ਊ +- 涣 +- 淬 +- 稅 +- 魇 +- 姣 +- 疣 +- 胫 +- 酊 +- 檗 +- 猾 +- 跄 +- 踉 +- 瓒 +- 骊 +- 鬆 +- ґ +- 咦 +- 肱 +- 臉 +- 鞄 +- 넉 +- 빗 +- 뻥 +- 얄 +- 엎 +- 칩 +- 훔 +- Ə +- ઊ +- 泱 +- 狞 +- 윈 +- 欽 +- 涧 +- 匝 +- 籌 +- 鲶 +- 嘗 +- 鰂 +- 鹩 +- 濮 +- 姦 +- 恿 +- 袈 +- £ +- 撐 +- 曉 +- 聰 +- 蔥 +- 郴 +- ʿ +- ໌ +- ၈ +- ጄ +- ឥ +- 戌 +- 蕙 +- 蠻 +- 贮 +- 铵 +- 깄 +- 앵 +- 혐 +- 檎 +- 緋 +- 桉 +- 骞 +- 坳 +- 箴 +- 桀 +- 鏑 +- 瀛 +- 礴 +- 芪 +- ঊ +- 倖 +- 谵 +- '{' +- Þ +- ၄ +- 喹 +- 燔 +- 芍 +- 詢 +- 遙 +- 깡 +- 뀔 +- 넥 +- 젝 +- 핏 +- 횟 +- Օ +- ઞ +- 洩 +- 颧 +- 燭 +- 뭐 +- ঞ +- 繰 +- 呷 +- 鲻 +- ங +- 鳝 +- 鹪 +- 偻 +- 珥 +- 铡 +- ಞ +- 戀 +- 狰 +- 璜 +- 纭 +- 蝈 +- ሟ +- ‽ +- 敝 +- 砝 +- 삿 +- 샷 +- 쏠 +- 쿼 +- 揸 +- 锰 +- 掟 +- 葭 +- 鸚 +- 謳 +- 罷 +- 湮 +- 蜃 +- Œ +- ಠ +- 诙 +- ਠ +- ಐ +- 厭 +- 咛 +- 掻 +- 揿 +- 纏 +- 荀 +- 菏 +- 蓦 +- 袂 +- 镫 +- 뱀 +- 뱉 +- 짰 +- 嘔 +- 歐 +- 礒 +- 葡 +- 鋸 +- 匯 +- 滉 +- 倹 +- ៌ +- 擺 +- 痱 +- 篑 +- 遲 +- 鹳 +- 嬴 +- 惴 +- 鰺 +- ၃ +- 彈 +- 滟 +- 簾 +- 苋 +- ٧ +- ਢ +- ၌ +- ★ +- ゑ +- 窈 +- 葺 +- 谪 +- 랗 +- 맣 +- 틈 +- 耆 +- 苅 +- 铉 +- 鹋 +- 妩 +- 塹 +- 牆 +- 獐 +- Չ +- ଵ +- 怵 +- 獻 +- 硌 +- 磴 +- ୌ +- 绌 +- 굿 +- 껀 +- 띵 +- 뻘 +- 슐 +- 쩜 +- 툴 +- 椽 +- 濂 +- 焗 +- 裱 +- 챔 +- 穩 +- 茗 +- 橿 +- 镑 +- 脷 +- 錆 +- 寰 +- 阡 +- 忻 +- 矾 +- 镭 +- 骜 +- 駛 +- 詔 +- 냬 +- 뉘 +- 듀 +- 횡 +- ȋ +- ቧ +- ᱷ +- 厲 +- 屐 +- 徵 +- 谗 +- 貰 +- 렬 +- 쿵 +- 唸 +- 嚥 +- 塙 +- 澪 +- 櫓 +- 燦 +- అ +- І +- 噚 +- 榷 +- 孪 +- 揀 +- 擞 +- 纜 +- Ø +- Ρ +- ⠀ +- 伢 +- 缄 +- 翳 +- 蛳 +- 諫 +- 谥 +- 賦 +- 雜 +- 鮑 +- Ζ +- Ї +- Ֆ +- ٪ +- 丟 +- 獎 +- 秸 +- 郦 +- 隱 +- 깼 +- 므 +- 팽 +- 푹 +- 仄 +- 廈 +- 杳 +- 涞 +- 淅 +- 袴 +- 繳 +- 撫 +- 嘤 +- 笺 +- 髮 +- 穣 +- 纾 +- 鲭 +- ኃ +- 垠 +- 牠 +- 睨 +- ၇ +- ጻ +- 佞 +- 櫛 +- 虛 +- 鲷 +- 갇 +- 똘 +- 쇄 +- 쑥 +- 젖 +- 탭 +- 훌 +- 淚 +- 贏 +- 镂 +- 阖 +- 夥 +- 滲 +- 簽 +- 菰 +- 鹹 +- 绗 +- 牝 +- 霏 +- 逵 +- 鹜 +- 鸸 +- 噏 +- 忤 +- 瞓 +- 啖 +- Ω +- ՛ +- ޟ +- ሹ +- ᱵ +- 殼 +- 礫 +- 簌 +- 蛔 +- 蛯 +- 铤 +- 곽 +- 귈 +- 됩 +- 벳 +- 숱 +- 숲 +- 첩 +- 탓 +- 틸 +- 펼 +- Ҷ +- ጸ +- 鋪 +- 쁨 +- 饽 +- 忿 +- 濾 +- 畿 +- 澁 +- 穢 +- 矶 +- 绉 +- 嚢 +- 犄 +- 筵 +- 茴 +- 莅 +- Ջ +- ଢ +- 嗔 +- 诓 +- 隍 +- 셰 +- ŕ +- ఛ +- 悌 +- 槇 +- 蘋 +- 鵝 +- 녜 +- 맻 +- 벙 +- 섰 +- 썬 +- 쏟 +- 퀘 +- 흉 +- Ď +- Ь +- 寵 +- 廢 +- 祓 +- 麽 +- 蹼 +- 鋒 +- 槭 +- 懋 +- 窕 +- ̂ +- 嵜 +- 掣 +- 灞 +- 襯 +- 邈 +- 锱 +- 魷 +- Շ +- ޘ +- ቫ +- 峽 +- 碓 +- 謠 +- 遴 +- 颦 +- 멕 +- 킥 +- ዥ +- ឧ +- 叢 +- 鍮 +- 黐 +- 갸 +- 뎅 +- 옵 +- 훠 +- 覽 +- 暹 +- 甾 +- 疸 +- 鴎 +- 缛 +- 烁 +- 猁 +- 猞 +- ឯ +- 佝 +- 欖 +- 瀝 +- 罡 +- 萸 +- 藿 +- ̄ +- 咂 +- 螢 +- 铢 +- 頚 +- 饗 +- 鸢 +- ઑ +- Ừ +- 嗬 +- 嘥 +- 嚕 +- 爲 +- 纨 +- 겟 +- 굔 +- 냠 +- 콕 +- 텝 +- 훅 +- Ң +- 悻 +- 潛 +- 苺 +- 藁 +- 鶯 +- 黍 +- 넛 +- 濫 +- 鄰 +- 蠅 +- 襖 +- 枇 +- 盧 +- 廚 +- 褓 +- 賤 +- 幄 +- 栀 +- 盃 +- 鑊 +- 珅 +- 绾 +- 蔦 +- 遨 +- Ҩ +- ۂ +- 嬲 +- 繫 +- 膊 +- 룬 +- 맺 +- 옴 +- 쭤 +- 팁 +- 폈 +- 瑪 +- 癸 +- 삽 +- 첼 +- 锑 +- 龜 +- 嵯 +- 氚 +- 蓼 +- ຼ +- 珪 +- 揖 +- 瑚 +- 膻 +- 霎 +- 飕 +- Ή +- Խ +- 峦 +- 廁 +- 蘆 +- 衢 +- 蹟 +- 锉 +- ሼ +- 啵 +- 恆 +- 焯 +- 诿 +- 깅 +- 뛸 +- 륨 +- 밸 +- 쉰 +- 텅 +- 壯 +- 夾 +- 峒 +- 蝋 +- 餘 +- 勁 +- 哏 +- 檄 +- 赭 +- 毓 +- 燗 +- 鮨 +- 榭 +- 啶 +- 梆 +- 嵘 +- 轢 +- 嗟 +- ጮ +- 嬰 +- 捱 +- 蹶 +- 꺾 +- 텨 +- Ӑ +- ኗ +- ዤ +- ዷ +- ፤ +- 冚 +- 搖 +- 楣 +- 浔 +- 瞟 +- 诨 +- 骥 +- 닙 +- 젓 +- 펐 +- 繞 +- 鹸 +- 瀞 +- 燮 +- 苜 +- 湎 +- 靱 +- 閻 +- 杷 +- 臧 +- 噻 +- 囫 +- 溴 +- 阄 +- Ҵ +- 簑 +- 薔 +- 蠔 +- 頁 +- ፐ +- 〔 +- 檔 +- 閱 +- 겄 +- 괄 +- 굶 +- 귓 +- 깠 +- 꽈 +- 넜 +- 럿 +- 옹 +- 욜 +- 쬐 +- 漑 +- 潸 +- 唷 +- 笈 +- 鱈 +- 蓿 +- 剌 +- 酩 +- 佇 +- 唑 +- 嶼 +- 钒 +- 胝 +- 胼 +- 蛱 +- 圩 +- 礙 +- 趨 +- Υ +- 櫂 +- 玥 +- 瑄 +- 绺 +- 蔔 +- 鸬 +- 鹚 +- Æ +- ୈ +- 墾 +- 搶 +- 盅 +- 绡 +- 蚩 +- 閪 +- "\x9E" +- ­ +- ̈ +- ۖ +- ଝ +- ሾ +- ዪ +- 琏 +- 굵 +- 눅 +- 늬 +- 됨 +- 붉 +- 샴 +- 씌 +- 얜 +- 옳 +- 욘 +- 傥 +- 蜣 +- 迴 +- 鱧 +- 唢 +- 殒 +- 菫 +- 沣 +- 爐 +- 泗 +- 揆 +- 靉 +- 倏 +- 疥 +- 卞 +- 噉 +- 囵 +- 殘 +- 氙 +- 腚 +- 銮 +- ᱣ +- 慘 +- 搔 +- 犠 +- 盥 +- 綫 +- 蒐 +- 褲 +- 訝 +- 辊 +- © +- ® +- ቭ +- 〕 +- 吡 +- 啉 +- 痂 +- 觞 +- 貘 +- 鷺 +- 눕 +- 늑 +- 늫 +- 렷 +- 벚 +- 뻗 +- 얌 +- 얗 +- 왓 +- 짖 +- Ư +- 唁 +- 圀 +- 鐔 +- 륜 +- 뻑 +- 쓱 +- 왤 +- 滘 +- 濑 +- 岷 +- 疋 +- 蓑 +- 譚 +- 铯 +- 毘 +- 諦 +- 襁 +- 讴 +- 鄞 +- 緣 +- 膘 +- 禰 +- 泮 +- 璎 +- 莊 +- 蔺 +- 裨 +- 陂 +- 馗 +- ڦ +- ಃ +- ጩ +- 渫 +- 溧 +- 獠 +- 祢 +- 诌 +- 赅 +- 괌 +- 렘 +- 렙 +- ኚ +- Ủ +- 兇 +- 彿 +- 荠 +- 谀 +- 댈 +- 룡 +- 륙 +- 및 +- 뿜 +- 셈 +- 읍 +- 찡 +- 毬 +- 辋 +- 箩 +- 饅 +- 拚 +- 紆 +- 葚 +- 儺 +- 籾 +- 菀 +- 呤 +- 煸 +- 琲 +- 胍 +- 玳 +- 谴 +- 镉 +- 俎 +- 洵 +- 锲 +- 颉 +- 僱 +- 柘 +- 栎 +- 疝 +- 萢 +- 鑽 +- 骶 +- 〆 +- 儘 +- 汩 +- 腭 +- Փ +- 佈 +- 掮 +- 梏 +- 歙 +- 毽 +- 涿 +- 矬 +- 엮 +- Û +- Ѐ +- ዣ +- ᱡ +- ᱦ +- 喬 +- 嫲 +- 嬌 +- 懶 +- 筲 +- 糅 +- 辭 +- 霭 +- 낑 +- 뎌 +- 뛴 +- 봅 +- 솜 +- 엣 +- 왁 +- 찹 +- 칫 +- 懼 +- 禎 +- ဌ +- 娆 +- 鬚 +- 荥 +- 笏 +- 嶌 +- 癣 +- 攣 +- 鍬 +- 嘌 +- 捌 +- 孑 +- 淦 +- 瑁 +- 硼 +- 擾 +- 泓 +- 閾 +- 楯 +- 蝕 +- ゐ +- 哙 +- 姝 +- 孖 +- 盂 +- 胄 +- Ɓ +- ଃ +- ഊ +- ዌ +- ፆ +- 倜 +- 卅 +- 卍 +- 柩 +- 鲅 +- 갛 +- 껑 +- 껜 +- 륭 +- 뭇 +- 슝 +- 싯 +- 쏴 +- 잎 +- 콧 +- 팜 +- 펀 +- ඞ +- 婀 +- 톨 +- 洙 +- 硤 +- 梠 +- 锆 +- 筧 +- 鵤 +- 菖 +- 邬 +- 軋 +- 栉 +- 忪 +- 桎 +- 筠 +- 脍 +- 锃 +- 佔 +- 儆 +- 掬 +- 旮 +- 荊 +- ᱹ +- 隽 +- 饴 +- 훑 +- Ċ +- Է +- ቬ +- ጧ +- 奂 +- 滷 +- 癮 +- 蝼 +- 슥 +- 쏙 +- 왈 +- 팥 +- 핥 +- Ը +- 乸 +- 擤 +- 縱 +- 铍 +- 멧 +- Ў +- 嘹 +- 埴 +- 悅 +- 欅 +- 谒 +- 鴛 +- ឲ +- 嘭 +- 箪 +- 鴦 +- 恫 +- 覃 +- 穎 +- 郜 +- 韋 +- 僭 +- 痙 +- 邁 +- 哌 +- 疇 +- 惇 +- 侗 +- 箒 +- 埂 +- 讶 +- 邺 +- 鲹 +- 稞 +- 蒡 +- 賴 +- Զ +- 绶 +- 贖 +- 铱 +- Ҭ +- 亳 +- 坩 +- 柒 +- 纰 +- 觸 +- Ɗ +- ፏ +- ⋯ +- 劑 +- 擴 +- 殲 +- 溏 +- 茯 +- 깍 +- 붐 +- 뻣 +- 샹 +- 줍 +- 쯔 +- 펠 +- 敕 +- 깬 +- 꾹 +- 뮬 +- 빰 +- 숍 +- 즙 +- 쭝 +- 쾅 +- 퀸 +- 킵 +- 펭 +- 헝 +- 俥 +- 滂 +- 瘘 +- 캘 +- 嗪 +- 衞 +- 睢 +- 铣 +- 韮 +- 翦 +- 娠 +- 旛 +- 翫 +- 蕈 +- 譯 +- 吽 +- 囿 +- ͘ +- 讚 +- 钹 +- Ƙ +- ǔ +- Ұ +- 阕 +- 阱 +- 麿 +- 쫑 +- 쿡 +- ဧ +- ሣ +- 嬤 +- 懺 +- 晁 +- 殓 +- 滾 +- 苻 +- 钯 +- 饬 +- 껐 +- 꿋 +- 덧 +- 뵈 +- 엌 +- 잌 +- Ë +- 沅 +- 瑩 +- 撻 +- 笥 +- 尕 +- 簀 +- 竈 +- 縷 +- 鮪 +- 糀 +- 謄 +- 侘 +- 鰭 +- 氩 +- 籐 +- 舖 +- 皺 +- 伫 +- 弭 +- 欸 +- 泙 +- Ύ +- 祚 +- 缢 +- 聒 +- 诰 +- 鄢 +- 鴫 +- 鹄 +- 齡 +- Ξ +- ಓ +- ᱰ +- 垓 +- 燉 +- 筈 +- 贊 +- 鬥 +- Ĩ +- Ź +- ഐ +- ඖ +- ᱲ +- 侩 +- 惱 +- 杓 +- 涪 +- 漿 +- 炆 +- 觥 +- 颍 +- 餵 +- 麾 +- 겜 +- 괘 +- 꿍 +- 돕 +- 돗 +- 딲 +- 릏 +- 맑 +- 뵙 +- 솥 +- 앚 +- 쫀 +- 쭘 +- 펄 +- Ũ +- Ӗ +- ┐ +- 榧 +- 癜 +- 缱 +- 饕 +- 驅 +- 鲇 +- 젯 +- 퍽 +- 갯 +- 憫 +- 爍 +- 鄧 +- 萘 +- 廼 +- 吁 +- 茲 +- 鉤 +- 埚 +- 墉 +- 慳 +- 泔 +- 猢 +- 瑭 +- 旌 +- 孱 +- 屆 +- 弐 +- 珉 +- 祗 +- 薑 +- Ŵ +- ʽ +- Њ +- ѐ +- ഛ +- ቾ +- Ị +- 唞 +- 囁 +- 搽 +- 旯 +- 绻 +- 腧 +- 膣 +- 謊 +- 谆 +- 谌 +- 蹤 +- 钽 +- 靂 +- 뚤 +- 몫 +- 삥 +- 웜 +- 줏 +- 쩰 +- 츤 +- 캉 +- 큘 +- 팸 +- 펍 +- 펫 +- 헨 +- 崑 +- 瑤 +- 痧 +- 轍 +- 顛 +- 飚 +- 鵑 +- 龅 +- 롬 +- 잣 +- 횐 +- ಛ +- ඍ +- 滯 +- 薹 +- 譴 +- 桿 +- 氤 +- 蹙 +- ÷ +- 鵯 +- 臘 +- 鲼 +- 甑 +- 鯵 +- 嬬 +- 婺 +- 杈 +- 鬶 +- 鲠 +- 鳶 +- 嬸 +- 骅 +- ઋ +- 啐 +- 嗫 +- 尴 +- 徉 +- 抻 +- 煬 +- 瓊 +- 祜 +- 虢 +- 鈿 +- 俶 +- 倌 +- 撳 +- 棗 +- 樸 +- 珲 +- 癬 +- 笪 +- 錶 +- 삘 +- 씽 +- ኡ +- ፔ +- 剱 +- 彎 +- 獰 +- 甕 +- 綁 +- 腍 +- 芡 +- 薈 +- 蜆 +- 逑 +- 겔 +- 곈 +- 뤘 +- 뾰 +- 옇 +- 윽 +- 잭 +- 텃 +- 텼 +- 픔 +- 兖 +- 勳 +- 擰 +- 朧 +- 桤 +- 睏 +- 迩 +- 흙 +- 舩 +- 訛 +- 馮 +- 撈 +- 攬 +- 祎 +- 饒 +- 儚 +- ឍ +- 纈 +- 纐 +- 莳 +- 氘 +- 鑓 +- 葳 +- 莪 +- 儂 +- 繇 +- 苒 +- 恸 +- 舢 +- 刎 +- 徜 +- 桠 +- 繹 +- 芫 +- 杢 +- 榫 +- 氲 +- 睜 +- 箜 +- 篌 +- 貅 +- 閖 +- ᱼ +- 屙 +- 敘 +- 暈 +- 梾 +- 籤 +- 謬 +- 낌 +- 씁 +- 씸 +- Ĕ +- Ŭ +- Ӡ +- ቿ +- 傢 +- 呟 +- 悭 +- 溟 +- 璽 +- 瓴 +- 绔 +- 芩 +- 貔 +- 酞 +- 釀 +- 鍊 +- 鲃 +- 깁 +- 돔 +- 둡 +- 랴 +- 썪 +- 짭 +- 짼 +- 퀵 +- 폿 +- 홋 +- ఞ +- 潞 +- 癲 +- 鋲 +- 캄 +- 퐁 +- 瀾 +- 訶 +- 贄 +- ಢ +- 羰 +- 羸 +- 麂 +-  +- 剋 +- 滁 +- 瑳 +- 谶 +- 荸 +- 좀 +- 碲 +- 楳 +- 鲳 +- 煕 +- 戇 +- 溲 +- 膑 +- ޣ +- 壷 +- 擠 +- 聿 +- 伉 +- 滦 +- 睥 +- 繩 +- 脘 +- 荽 +- 崙 +- 攤 +- 柾 +- 砀 +- 籮 +- 蠡 +- 谧 +- ഔ +- 挲 +- 晔 +- 琮 +- 瘋 +- 藨 +- 钣 +- Ơ +- Ҟ +- ಔ +- ዑ +- ጵ +- Ứ +- ‫ +- ∈ +- 凇 +- 懑 +- 掙 +- 揼 +- 涜 +- 炔 +- 繪 +- 腟 +- 芾 +- 錘 +- 頒 +- 驟 +- 鹞 +- 꽝 +- 넬 +- 눴 +- 뜸 +- 밉 +- 뵀 +- 삔 +- 샜 +- 셌 +- 쑤 +- 엿 +- 콸 +- Ҥ +- Ҽ +- ဿ +- 捩 +- 禿 +- 竇 +- 譽 +- 郫 +- 둠 +- 뒹 +- 렐 +- 맴 +- 뽈 +- 첸 +- 拋 +- 淙 +- 盞 +- 丶 +- 寬 +- 獵 +- 窰 +- 舳 +- 註 +- ઃ +- 诅 +- 閤 +- 鴇 +- 嘧 +- 慄 +- 攝 +- 蝨 +- 鰯 +- 貶 +- 臾 +- 笕 +- 頷 +- 镌 +- 竪 +- 噤 +- 诘 +- 锗 +- 閘 +- 嗚 +- 壩 +- 撺 +- 晷 +- 桡 +- 棹 +- 耧 +- 趸 +- → +- 妁 +- 牯 +- 瓿 +- 笄 +- 蛄 +- 豈 +- 铖 +- 骐 +- 鷉 +- 鸾 +- 屍 +- 楸 +- 踽 +- 锒 +- 鲲 +- 섀 +- 켄 +- Ľ +- ӊ +- ጇ +- ጬ +- ◎ +- 嚨 +- 姒 +- 蒺 +- 蝸 +- 輻 +- 鸨 +- 齟 +- 깰 +- 끽 +- 낵 +- 눔 +- 닳 +- 밋 +- 밲 +- 벡 +- 뺨 +- 쉘 +- 슉 +- 쌔 +- 짚 +- 촛 +- 춧 +- 캣 +- 캥 +- 튕 +- 휙 +- ਔ +- ఢ +- ೧ +- ᱯ +- 偲 +- 劍 +- 枱 +- 膩 +- 艷 +- 菸 +- 詛 +- 豇 +- 낡 +- 됬 +- 둣 +- 쌰 +- 撷 +- 贰 +- 躙 +- 辯 +- 遑 +- Յ +- 楮 +- 誼 +- 瞞 +- 祿 +- 绦 +- 廬 +- 皋 +- 妝 +- 鸮 +- 떤 +- 圳 +- 捲 +- 陝 +- 獺 +- 媾 +- 魍 +- 鼩 +- 鴈 +- ゝ +- 狲 +- 釐 +- 铄 +- 沮 +- 蘼 +- 邕 +- 钎 +- 靥 +- 鞣 +- ♥ +- 姘 +- 娣 +- 稹 +- 胳 +- 郅 +- 阆 +- 颱 +- 餮 +- ־ +- 儋 +- 厝 +- 肅 +- 誊 +- 騷 +- 숟 +- "\x92" +- "\x97" +- Ճ +- ՞ +- ኜ +- ዢ +- ዦ +- Ố +- 嘍 +- 揩 +- 曆 +- 栢 +- 潋 +- 箬 +- 糍 +- 遞 +- 髡 +- 鰐 +- 鲡 +- 鲢 +- 齬 +- 걀 +- 꺄 +- 꿉 +- 뗄 +- 빕 +- 얽 +- 읏 +- 잰 +- 쟀 +- 컥 +- 킷 +- 햐 +- 흩 +- ඣ +- ፑ +- ៣ +- 寳 +- 淖 +- 灑 +- 錨 +- 駁 +- 랠 +- 썽 +- 웰 +- 젬 +- 탬 +- 툼 +- 핍 +- 檳 +- 籃 +- 僑 +- 橼 +- 脹 +- 銛 +- 钜 +- 盱 +- 籲 +- 陲 +- 颔 +- 勖 +- 蓖 +- 郢 +- 檯 +- 粂 +- 獏 +- 燿 +- 祕 +- 鯊 +- 枡 +- 惕 +- 兌 +- 钼 +- 鞆 +- ● +- 綦 +- 蕗 +- 埜 +- 焘 +- 劭 +- 愎 +- 橈 +- 鎚 +- 锷 +- 鞏 +- 齁 +- 龇 +- ဍ +- ♂ +- 侪 +- 窠 +- 肪 +- 蜇 +- 逹 +- 邛 +- 卟 +- 撓 +- 燐 +- 纖 +- 郓 +- 闱 +- 餞 +- 鹓 +- 俬 +- 呔 +- 澧 +- 燴 +- 犍 +- 羧 +- 葶 +- 谲 +- ¬ +- Ć +- ː +- ̓ +- ڱ +- ۚ +- ඪ +- ፁ +- ᱪ +- Ẩ +- 呎 +- 哣 +- 噃 +- 搣 +- 淒 +- 苕 +- 萋 +- 襪 +- 訇 +- 諄 +- 謢 +- 邳 +- 鉈 +- 鴉 +- 鸻 +- 갭 +- 곶 +- 넹 +- 뗐 +- 룽 +- 맷 +- 샾 +- 쐈 +- 쨍 +- 챠 +- 컹 +- 튠 +- 푠 +- ˮ +- Ί +- Ҿ +- ޛ +- ዟ +- 悗 +- 耒 +- 躾 +- 鏟 +- 閂 +- 곁 +- 맏 +- 뮌 +- 찻 +- 睪 +- 礦 +- 筺 +- 艱 +- 賬 +- 镕 +- 蕕 +- 炝 +- 聾 +- 逡 +- ฌ +- 瀋 +- 詭 +- 鲣 +- 侉 +- 埙 +- 慟 +- 蜉 +- 钡 +- 輯 +- 諧 +- 吳 +- 鐙 +- 陛 +- 撹 +- 苧 +- 劔 +- 濛 +- 齧 +- Ժ +- 剝 +- 吩 +- 徕 +- 镓 +- ൌ +- 佻 +- 嚅 +- 岘 +- 窨 +- 跬 +- 銜 +- 骢 +- 魉 +- 鰓 +- 㩒 +- 嬗 +- 旎 +- 旖 +- 氫 +- 洄 +- 牺 +- 篙 +- 舂 +- 闌 +- 飄 +- Ծ +- ḿ +- 仞 +- 嘯 +- 噓 +- 囹 +- 圄 +- 岿 +- 恁 +- 揦 +- 殚 +- 沆 +- 簕 +- 莠 +- 莼 +- 萊 +- 鰆 +- ɔ +- ≡ +- 砣 +- 辇 +- 궐 +- 뽐 +- "\x84" +- "\x9A" +- § +- Ť +- Ձ +- ۓ +- ጢ +- ፂ +- ፄ +- Ổ +- Ờ +- 僆 +- 棂 +- 獸 +- 瓏 +- 瘢 +- 聳 +- 荛 +- 衿 +- 遒 +- 鑰 +- 镬 +- 깽 +- 꺠 +- 끙 +- 늪 +- 댐 +- 딥 +- 딧 +- 랖 +- 룻 +- 릅 +- 봔 +- 봬 +- 뼛 +- 섣 +- 쉼 +- 싣 +- 쎘 +- 얏 +- 윌 +- 쥴 +- 쯧 +- 챈 +- 챌 +- 촥 +- 톰 +- 핬 +- 圪 +- 屜 +- 捽 +- 珮 +- 碛 +- 臟 +- 螫 +- 輋 +- 醪 +- 骠 +- 똠 +- 맸 +- 샬 +- 沔 +- 禊 +- 襦 +- 趄 +- 邇 +- 剜 +- 筍 +- 緞 +- 虧 +- 趔 +- 铩 +- 埒 +- 憚 +- 扦 +- 罟 +- 囮 +- 賈 +- 匁 +- 吲 +- 哚 +- 雎 +- 鄒 +- 飫 +- 縊 +- 讼 +- 濯 +- 竊 +- 铕 +- 蓣 +- ๅ +- 潑 +- 瀉 +- Ґ +- ಊ +- 屓 +- 碣 +- 粳 +- 苈 +- 蕤 +- 誨 +- 跖 +- 騾 +- 魑 +- ⸺ +- 倅 +- 幇 +- 廂 +- 柺 +- 楡 +- 瀨 +- 狆 +- 籏 +- 籬 +- 跣 +- 锶 +- 镆 +- 韌 +- ΐ +- 佚 +- 汜 +- 牍 +- 牴 +- 癱 +- 蟬 +- 顏 +- ϋ +- ዉ +- ▪ +- 佥 +- 刿 +- 噙 +- 孭 +- 洮 +- 漲 +- 猷 +- 瓤 +- 疊 +- 癡 +- 矍 +- 硯 +- 稃 +- 讫 +- 贔 +- 꾀 +- 떵 +- 맬 +- 빳 +- 챕 +- "\x91" +- "\x96" +- ƴ +- ؔ +- ઔ +- ෲ +- ኣ +- ១ +- 啞 +- 夘 +- 嵋 +- 徬 +- 慾 +- 斂 +- 渌 +- 滌 +- 漯 +- 燶 +- 砵 +- 稟 +- 笤 +- 蓆 +- 螞 +- 覓 +- 諺 +- 诳 +- 谡 +- 踎 +- 躉 +- 鈔 +- 铋 +- 陞 +- 顼 +- 麪 +- 갬 +- 궜 +- 궤 +- 꿇 +- 덱 +- 떳 +- 띨 +- 룐 +- 몹 +- 삑 +- 슁 +- 쏭 +- 앰 +- 욤 +- 웩 +- 잦 +- 죙 +- 챘 +- 첵 +- 촘 +- 쿤 +- 킴 +- 텁 +- 퓸 +- 훼 +- ઍ +- 冧 +- 勐 +- 擸 +- 晉 +- 汨 +- 燊 +- 瘡 +- 癇 +- 眀 +- 鎹 +- 铷 +- 鯰 +- 鱒 +- 딤 +- 앓 +- 얍 +- 윷 +- 쟈 +- 팎 +- 囗 +- 諌 +- 鋤 +- 壆 +- 嶄 +- 碕 +- 綵 +- 闢 +- 鳐 +- 秣 +- 簗 +- 蕪 +- 氹 +- 兪 +- 恹 +- 鈕 +- 钇 +- 蝣 +- 軀 +- 畲 +- 埕 +- 潅 +- 瞼 +- 褄 +- 詈 +- 邏 +- 颶 +- 莟 +- 艙 +- 碩 +- 筅 +- 钐 +- 寤 +- 徭 +- 銑 +- 朊 +- 楝 +- 澩 +- 竽 +- 褌 +- 覲 +- 铑 +- 鵠 +- 捭 +- 哞 +- 墊 +- 忾 +- 杼 +- 玑 +- 砭 +- 芨 +- 菡 +- 锇 +- 埓 +- 壅 +- 峅 +- 崧 +- 徠 +- 悱 +- 瑧 +- 脩 +- 邙 +- 铨 +- 鱸 +- 鸩 +- ẅ +- 壢 +- 泫 +- 爰 +- 箝 +- 耄 +- 耋 +- ▲ +- 垭 +- 巒 +- 捯 +- 撿 +- 攋 +- 梼 +- 璟 +- 疖 +- 痦 +- 紥 +- 缬 +- 腈 +- 菝 +- 逓 +- 铒 +- 鬢 +- 魃 +- 뎁 +- ѝ +- 垚 +- 擲 +- 眦 +- 絃 +- 舄 +- 蘅 +- 隸 +- 髯 +- 鲛 +- 鹱 +- 뎠 +- 딛 +- 몄 +- 쨈 +- 휠 +- 휩 +- ȃ +- Ց +- ۃ +- ॠ +- ḅ +- ヮ +- 勻 +- 卌 +- 吋 +- 噸 +- 囝 +- 奀 +- 戆 +- 揈 +- 揞 +- 搲 +- 攏 +- 昶 +- 暅 +- 椶 +- 榲 +- 橞 +- 涠 +- 珙 +- 琬 +- 磚 +- 粝 +- 糰 +- 緬 +- 罫 +- 羈 +- 葜 +- 蒹 +- 蕁 +- 薷 +- 蠱 +- 襴 +- 轸 +- 邰 +- 鏈 +- 鬘 +- 龐 +- 긱 +- 꺽 +- 늠 +- 뎀 +- 딕 +- 띡 +- 뵐 +- 셧 +- 슌 +- 웍 +- 윳 +- 짙 +- 쫒 +- 텄 +- 헉 +- 헹 +- 훗 +- Ώ +- ♯ +- 刪 +- 妯 +- 廪 +- 瀟 +- 犧 +- 畦 +- 癪 +- 矽 +- 禪 +- 腴 +- 袢 +- 鉦 +- 鏝 +- 뽁 +- 켈 +- 휜 +- 沭 +- 漣 +- 磔 +- 蕩 +- ଐ +- 仟 +- 壟 +- 妪 +- 淝 +- 紓 +- 苴 +- 莜 +- 隴 +- 饌 +- 駭 +- 鹘 +- 黢 +- ઐ +- 浛 +- ㄟ +- 剷 +- 圻 +- 澹 +- 砻 +- 肄 +- 崂 +- 痾 +- 稗 +- 褻 +- 迨 +- 镧 +- 霰 +- 顰 +- 輦 +- 輛 +- 焔 +- 篭 +- 踐 +- 坻 +- 왜 +- 殯 +- 靄 +- 琨 +- 闖 +- 騭 +- 蝿 +- 頤 +- 厠 +- 夲 +- 嫪 +- 玘 +- 蘊 +- 黚 +- 黧 +- 媞 +- 咥 +- 嘜 +- 嵊 +- 椴 +- 濞 +- 緘 +- 藺 +- 蝮 +- 醴 +- 鉉 +- 鹗 +- 贲 +- ഃ +- ဠ +- 偈 +- 唪 +- 嗳 +- 姶 +- 嫻 +- 孥 +- 崁 +- 彧 +- 徂 +- 枞 +- 狽 +- 皲 +- 紮 +- 缦 +- 莒 +- 裃 +- ṃ +- 穫 +- ゚ +- 喑 +- 摟 +- 擋 +- 氡 +- 篾 +- 絣 +- 绐 +- 聩 +- 蚶 +- 螟 +- 襞 +- 賁 +- 踟 +- 蹰 +- 鈷 +- 镛 +- 闾 +- 髂 +- 鲆 +- 齒 +- 쌋 +- 췌 +- ॊ +- ఔ +- ဩ +- ኅ +- ※ +- 俅 +- 唖 +- 尷 +- 洇 +- 澆 +- 绀 +- 蕭 +- 틋 +- ኻ +- 佷 +- 侈 +- 뉜 +- 쉑 +- 쑈 +- "\x8A" +- "\x9D" +- Ğ +- ǐ +- ಝ +- ೯ +- ඃ +- ฯ +- ဈ +- ሒ +- ኼ +- ጴ +- ៖ +- ៦ +- ᱬ +- ᱶ +- ᱸ +- □ +- 䁅 +- 呯 +- 唈 +- 唳 +- 喐 +- 嗄 +- 噹 +- 娌 +- 娛 +- 寢 +- 嶂 +- 恽 +- 慚 +- 懞 +- 懣 +- 抌 +- 攜 +- 曚 +- 枥 +- 柽 +- 樑 +- 樞 +- 樾 +- 牀 +- 狍 +- 稙 +- 繚 +- 舸 +- 芎 +- 衾 +- 訕 +- 豢 +- 躝 +- 轎 +- 酐 +- 鎏 +- 鏊 +- 钆 +- 钪 +- 钶 +- 雋 +- 饋 +- 鬠 +- 鸫 +- 龠 +- 갰 +- 겅 +- 곗 +- 곪 +- 굼 +- 낱 +- 냇 +- 넒 +- 닛 +- 댑 +- 덨 +- 듦 +- 땔 +- 떄 +- 뗀 +- 똔 +- 롷 +- 롹 +- 묽 +- 볐 +- 빢 +- 뼌 +- 뽂 +- 샛 +- 샥 +- 솟 +- 숄 +- 숑 +- 슛 +- 쐬 +- 쑨 +- 쓕 +- 앳 +- 얕 +- 옅 +- 웁 +- 윰 +- 쟨 +- 젼 +- 짹 +- 쫘 +- 쭌 +- 챗 +- 펩 +- 푯 +- 핌 +- 𢱕 +- 櫚 +- 煅 +- 甌 +- 莢 +- 驕 +- 髌 +- 랏 +- 쏜 +- 옐 +- 핼 +- ஔ +- 僖 +- 恊 +- 滙 +- 澍 +- 癢 +- 粿 +- 翹 +- 蔀 +- 蛸 +- 躓 +- 鏃 +- 飩 +- 髀 +- 吶 +- 垃 +- 巉 +- 巔 +- 怩 +- 搗 +- 楦 +- 琚 +- 篁 +- 脲 +- 誡 +- 阊 +- 鬻 +- 鸱 +- 叺 +- 湟 +- 頗 +- 魟 +- 狢 +- 畐 +- 畷 +- 椹 +- 諍 +- 醮 +- 鐸 +- 釗 +- 镗 +- 锴 +- ゙ +- 戕 +- 稈 +- 纒 +- 亓 +- 庹 +- 氖 +- 祉 +- 鉗 +- 础 +- 嚯 +- 堉 +- 桫 +- 椤 +- 楋 +- 瀣 +- 珧 +- 礬 +- 舾 +- 邾 +- 鵺 +- 鼱 +- 滢 +- 臍 +- 堝 +- 弍 +- 晞 +- 椁 +- 濺 +- 睄 +- 礇 +- 笫 +- 蠟 +- 鎂 +- 閨 +- 怏 +- 慷 +- 瀏 +- 綸 +- 罎 +- 閩 +- ጐ +- ឫ +- 伧 +- 僂 +- 冼 +- 夔 +- 媠 +- 嵴 +- 幟 +- 畊 +- 磬 +- 窺 +- 簷 +- 胿 +- 臬 +- 蚵 +- 蹚 +- 鋏 +- 鏖 +- 霑 +- 騮 +- 鲔 +- 끅 +- ̋ +- Љ +- ѕ +- ២ +- 佶 +- 唻 +- 抾 +- 柞 +- 澱 +- 錙 +- 늄 +- 뒨 +- 믈 +- 콰 +- ± +- ¶ +- ʾ +- ̊ +- ଈ +- ଔ +- ஶ +- ኀ +- ጳ +- ጼ +- ጾ +- ጿ +- ፥ +- ឮ +- ០ +- ៤ +- ៨ +- ᱝ +- 㓤 +- 仝 +- 倧 +- 刄 +- 厍 +- 咇 +- 唥 +- 喏 +- 嗞 +- 囍 +- 圜 +- 埞 +- 塬 +- 塱 +- 墀 +- 墮 +- 壸 +- 婭 +- 岫 +- 崃 +- 崋 +- 弶 +- 愜 +- 憊 +- 挈 +- 揜 +- 摮 +- 擘 +- 擱 +- 昐 +- 枘 +- 枳 +- 椚 +- 槁 +- 樒 +- 櫸 +- 淸 +- 溘 +- 溼 +- 燙 +- 痈 +- 硎 +- 篩 +- 簒 +- 縝 +- 縻 +- 纔 +- 荪 +- 葯 +- 蚴 +- 蛏 +- 蛻 +- 蝰 +- 蠹 +- 裇 +- 裥 +- 誅 +- 豎 +- 貲 +- 踭 +- 踴 +- 蹌 +- 蹣 +- 鑿 +- 铼 +- 锺 +- 镲 +- 颙 +- 駈 +- 駱 +- 鮓 +- 鮟 +- 鯇 +- 鰈 +- 鰜 +- 鱇 +- 鲀 +- 鵲 +- 괭 +- 굘 +- 긌 +- 깟 +- 깻 +- 꼿 +- 넙 +- 뉸 +- 뗘 +- 뚠 +- 띃 +- 렜 +- 룔 +- 멓 +- 멱 +- 뭄 +- 뺌 +- 뿅 +- 뿍 +- 숯 +- 슘 +- 쎌 +- 얠 +- 옌 +- 잴 +- 쩝 +- 쳇 +- 췻 +- 츰 +- 캇 +- 켔 +- 퀭 +- 킁 +- 탤 +- 튄 +- 팰 +- 혓 +- 홧 +- 훤 +- 휑 +- 힉 +- ȇ +- ˎ +- 躼 +- 곯 +- 덫 +- 햅 +- Ě +- 狹 +- 睚 +- 觜 +- 겡 +- 셴 +- 쌜 +- ಋ +- 壘 +- 孛 +- 忟 +- 旻 +- 榑 +- 煥 +- 狎 +- 眇 +- 罘 +- 胪 +- 脛 +- 舨 +- 镒 +- 餼 +- 馐 +- 촤 +- 歃 +- 禛 +- ♭ +- 垌 +- 尭 +- 晝 +- 楹 +- 滄 +- 砜 +- 菟 +- 蒨 +- 藷 +- 鏢 +- 鐡 +- 頌 +- 馕 +- 鰲 +- 鳉 +- 豕 +- 蜘 +- ㄧ +- 嵬 +- 忸 +- 暝 +- 盜 +- 螣 +- 謐 +- 嘬 +- 圾 +- 洟 +- 舁 +- 醗 +- 铟 +- 颏 +- 黌 +- 栂 +- 瘻 +- 瞋 +- 窣 +- 窸 +- 絋 +- 鶫 +- 銕 +- √ +- 蚺 +- 蹕 +- 窶 +- 牻 +- 擯 +- 愼 +- 榾 +- 癩 +- 筜 +- 筼 +- 聶 +- 蟄 +- 鍔 +- 頽 +- 黠 +- 梛 +- 莨 +- 骈 +- 鸊 +- 倬 +- 唛 +- 嗶 +- 嘰 +- 嚐 +- 媪 +- 徛 +- 憺 +- 揠 +- 甦 +- 翃 +- 肭 +- 芗 +- 茔 +- 萁 +- 葎 +- 蓠 +- 蛉 +- 蜮 +- 贶 +- 趖 +- 轡 +- 釁 +- 鈉 +- 隹 +- 餉 +- 饪 +- 騅 +- 橹 +- 篳 +- 蟯 +- Ъ +- ឦ +- ∠ +- 〝 +- ヰ +- 俠 +- 冪 +- 埤 +- 墘 +- 嬅 +- 峤 +- 巿 +- 扻 +- 搦 +- 攔 +- 昰 +- 枋 +- 槊 +- 渑 +- 燵 +- 猊 +- 簋 +- 肼 +- 臚 +- 艄 +- 茆 +- 茼 +- 菘 +- 菪 +- 諷 +- 譟 +- 躑 +- 輓 +- 郯 +- 郾 +- 鄄 +- 鋆 +- 铳 +- 锨 +- 閏 +- 颪 +- 馭 +- 髒 +- 鱔 +- ǰ +- ॲ +- ୫ +- ឪ +- 愠 +- 歛 +- 皚 +- 硲 +- 稣 +- 蛚 +- 輾 +- 馩 +- 꽌 +- 롸 +- 밧 +- 뱄 +- "\x80" +- "\x99" +- Ï +- ǒ +- ȏ +- ̌ +- ̔ +- ̟ +- Ӯ +- ٗ +- ۗ +- ޡ +- ऍ +- ॆ +- ॔ +- ૢ +- ୪ +- ೦ +- ೨ +- ೩ +- ෳ +- ቮ +- ጹ +- ḉ +- ḫ +- ṇ +- ṉ +- ẃ +- Ạ +- Ầ +- Ậ +- Ệ +- Ự +- ⁠ +- ∙ +- ⊙ +- ◯ +- ⸻ +- 〟 +- 㧎 +- 䒏 +- 䒐 +- 佮 +- 俟 +- 倯 +- 倻 +- 偢 +- 僥 +- 儍 +- 凊 +- 匱 +- 叁 +- 嗐 +- 嘏 +- 噁 +- 囪 +- 埵 +- 堯 +- 奓 +- 姍 +- 娉 +- 尣 +- 弸 +- 怍 +- 悽 +- 挜 +- 挹 +- 揗 +- 摈 +- 斃 +- 昉 +- 曵 +- 梣 +- 棧 +- 楫 +- 橐 +- 欒 +- 殭 +- 殳 +- 洎 +- 浐 +- 涷 +- 玭 +- 瑋 +- 璁 +- 璈 +- 甙 +- 畈 +- 瘓 +- 眙 +- 硃 +- 碚 +- 磧 +- 竅 +- 筥 +- 篦 +- 粲 +- 糒 +- 繻 +- 罅 +- 胗 +- 舺 +- 艋 +- 艤 +- 艪 +- 艸 +- 茕 +- 荜 +- 莵 +- 菉 +- 菔 +- 萠 +- 蓺 +- 蔣 +- 蘂 +- 蟥 +- 覯 +- 訐 +- 訥 +- 詡 +- 誣 +- 誦 +- 誻 +- 謨 +- 譖 +- 豂 +- 赧 +- 趌 +- 趺 +- 躅 +- 軚 +- 輘 +- 輷 +- 迾 +- 鄣 +- 醌 +- 鎅 +- 鎔 +- 鎝 +- 鏨 +- 鑄 +- 鑲 +- 钋 +- 闕 +- 陉 +- 頰 +- 餋 +- 餒 +- 餛 +- 馯 +- 骘 +- 鬍 +- 鬨 +- 魎 +- 鲮 +- 鲿 +- 鳫 +- 鵞 +- 鵡 +- 鶉 +- 鹛 +- 鼆 +- 鼐 +- 걜 +- 겋 +- 곌 +- 굥 +- 귐 +- 꽐 +- 꽥 +- 꿩 +- 끍 +- 냔 +- 냘 +- 냡 +- 넝 +- 넴 +- 놉 +- 놋 +- 놘 +- 뇽 +- 뉩 +- 늉 +- 댜 +- 듈 +- 땋 +- 떰 +- 띤 +- 맽 +- 멩 +- 몀 +- 믕 +- 볕 +- 뵌 +- 빻 +- 뿡 +- 샨 +- 숀 +- 숴 +- 슾 +- 쌕 +- 쌨 +- 썅 +- 썜 +- 쎅 +- 쏼 +- 쒀 +- 씰 +- 옉 +- 옫 +- 웟 +- 읊 +- 읜 +- 좆 +- 짯 +- 쨋 +- 쨰 +- 쩄 +- 쮸 +- 촐 +- 캬 +- 켁 +- 켐 +- 켸 +- 콥 +- 쿱 +- 퉤 +- 튬 +- 팹 +- 퐉 +- 푤 +- 퓰 +- 픕 +- 휀 +- 𢳂 +- ʺ +- 꾜 +- 꿰 +- 쇳 +- 잽 +- Ǹ +- ೫ +- ኟ +- ṅ +- † +- 凈 +- 垪 +- 拵 +- 曷 +- 梘 +- 漚 +- 絚 +- 鯪 +- 넚 +- 쌉 +- 엡 +- ೮ +- 䢢 +- 佤 +- 凫 +- 煖 +- 獪 +- 瘆 +- 癆 +- 秬 +- 糋 +- 躹 +- 迳 +- 钌 +- 陜 +- 韪 +- 驛 +- 髙 +- 鹼 +- 갉 +- 뷴 +- 寃 +- 刳 +- 劏 +- 嚡 +- 杮 +- 槎 +- 槤 +- 樅 +- 沤 +- 炴 +- 煊 +- 熒 +- 珎 +- 璉 +- 痼 +- 簔 +- 苄 +- 苳 +- 菴 +- 蘚 +- 虬 +- 詆 +- 赍 +- 跶 +- 铪 +- 闔 +- 顱 +- 颢 +- 飜 +- 骹 +- 伜 +- 柝 +- 聟 +- 釵 +- 崆 +- 畋 +- 笸 +- 膾 +- 蕲 +- 靫 +- 頴 +- 髷 +- 鵙 +- 鸷 +- 錺 +- 蒟 +- 愴 +- 腘 +- 鯱 +- 샀 +- 끊 +- 굳 +- 拶 +- Ẓ +- 橄 +- Ṛ +- 惫 +- 咆 +- 옛 +- ఊ +- 싫 +- 밌 +- 雰 +- 괜 +- 떻 +- 뭔 +- 伥 +- 飪 +- 鬪 +- 鼈 +- 걔 +- 椙 +- 蒎 +- 锿 +- 鲵 +- 亰 +- 啻 +- 囂 +- 峁 +- 弖 +- 憮 +- 桴 +- 瓘 +- 瘧 +- 秭 +- 簓 +- 薖 +- 蝴 +- 谯 +- 趿 +- 镨 +- 闳 +- 馑 +- 骺 +- 鱚 +- 鲐 +- ఋ +- 咙 +- Ÿ +- ഋ +- ഢ +- ဋ +- 丨 +- 丱 +- 仱 +- 傈 +- 傩 +- 僳 +- 劊 +- 嗆 +- 嗹 +- 嘁 +- 岈 +- 嵖 +- 巖 +- 庠 +- 廸 +- 戔 +- 扂 +- 拏 +- 挾 +- 掗 +- 摰 +- 撣 +- 攑 +- 敍 +- 旃 +- 旒 +- 栻 +- 槓 +- 歕 +- 歨 +- 殂 +- 泠 +- 渀 +- 潲 +- 潷 +- 澀 +- 瓩 +- 甓 +- 疃 +- 癀 +- 癔 +- 竦 +- 筊 +- 篥 +- 籟 +- 籼 +- 糬 +- 緹 +- 縉 +- 縢 +- 膕 +- 臈 +- 臙 +- 艏 +- 苡 +- 莩 +- 蓊 +- 薀 +- 薜 +- 蘓 +- 裈 +- 褧 +- 覕 +- 諡 +- 謇 +- 诒 +- 诤 +- 贽 +- 軼 +- 迤 +- 逶 +- 邡 +- 醅 +- 釼 +- 錚 +- 鍚 +- 鐃 +- 鐐 +- 钺 +- 铇 +- 锕 +- 镎 +- 靑 +- 顫 +- 髖 +- 鰊 +- 鳎 +- 鳔 +- 鷽 +- 鼙 +- ኰ +- 䖙 +- 俢 +- 儉 +- 啍 +- 埲 +- 屄 +- 戥 +- 掞 +- 枦 +- 爿 +- 笳 +- 綢 +- 繄 +- 翕 +- 芘 +- 蛣 +- 豔 +- 蹐 +- 鉀 +- 闞 +- 鵪 +- 鸝 +- 됭 +- 싰 +- 옭 +- 좔 +- ̧ +- ☉ +- 떫 +- 뱌 +- 벘 +- 씉 +- 엷 +- 읐 +- Ƴ +- "\x81" +- "\x8D" +- "\x8E" +- "\x98" +- ¦ +- Ð +- Ù +- Ŋ +- ƒ +- Ǥ +- ǫ +- Ʉ +- ˊ +- ˋ +- ̕ +- ̱ +- ̲ +- ̶ +- Ψ +- Ђ +- Ћ +- Ќ +- Ѹ +- Ӌ +- Ӳ +- Ղ +- Ր +- ַ +- ּ +- ؓ +- ٓ +- ऩ +- ॐ +- ৗ +- ઼ +- ૧ +- ૮ +- ୗ +- ୨ +- ୯ +- ఝ +- ఱ +- ೪ +- ഝ +- ෴ +- ་ +- ན +- ሓ +- ሿ +- ቐ +- ቓ +- ቯ +- ዒ +- ዖ +- ዡ +- ጯ +- ፗ +- ៎ +- ៥ +- ៧ +- ḏ +- ḡ +- ḩ +- Ḫ +- ṁ +- ẞ +- Ắ +- Ể +- Ỉ +- Ỗ +- Ộ +- Ớ +- Ὑ +- ῖ +- ‰ +- ₹ +- ⃗ +- ↑ +- ↓ +- ∇ +- ∼ +- ≈ +- ▼ +- ☺ +- ✅ +- Ⱅ +- Ⱎ +- ゎ +- ゔ +- 㶶 +- 䁯 +- 䆀 +- 䱽 +- 䴕 +- 亍 +- 佉 +- 侷 +- 傃 +- 傉 +- 傱 +- 僉 +- 僊 +- 僮 +- 凩 +- 刋 +- 剎 +- 劖 +- 劼 +- 勰 +- 勼 +- 匏 +- 厴 +- 厶 +- 叅 +- 吿 +- 呓 +- 咹 +- 哓 +- 唂 +- 唎 +- 喟 +- 喲 +- 喼 +- 嗇 +- 嗿 +- 嚀 +- 嚒 +- 囑 +- 圷 +- 坜 +- 坵 +- 坼 +- 埆 +- 埭 +- 塢 +- 塭 +- 墁 +- 奬 +- 妗 +- 妠 +- 妡 +- 妣 +- 妤 +- 姹 +- 娗 +- 嫐 +- 嬶 +- 尓 +- 尙 +- 屢 +- 屣 +- 岀 +- 峄 +- 峇 +- 崞 +- 崮 +- 帙 +- 廍 +- 弌 +- 弢 +- 彥 +- 彳 +- 悒 +- 惝 +- 愔 +- 愷 +- 愾 +- 慤 +- 戞 +- 戽 +- 扴 +- 抆 +- 抔 +- 抦 +- 拃 +- 捹 +- 掁 +- 掕 +- 掼 +- 摙 +- 摳 +- 摵 +- 摷 +- 擏 +- 擷 +- 斝 +- 旳 +- 昃 +- 晡 +- 晧 +- 暸 +- 杙 +- 杣 +- 杻 +- 枧 +- 枰 +- 柸 +- 棔 +- 椏 +- 椟 +- 榙 +- 樁 +- 樗 +- 檪 +- 櫈 +- 殁 +- 殄 +- 毵 +- 氾 +- 汱 +- 洐 +- 洹 +- 淥 +- 淪 +- 湫 +- 溋 +- 溷 +- 滸 +- 潴 +- 澗 +- 澶 +- 濉 +- 瀍 +- 烝 +- 煆 +- 熈 +- 燜 +- 爨 +- 牾 +- 猗 +- 玜 +- 玠 +- 珒 +- 珣 +- 瑨 +- 瓠 +- 疔 +- 疠 +- 疴 +- 睞 +- 硚 +- 硨 +- 磲 +- 礑 +- 祆 +- 禳 +- 穐 +- 笮 +- 筌 +- 筿 +- 箆 +- 箓 +- 篋 +- 簟 +- 簫 +- 糉 +- 絅 +- 綖 +- 綯 +- 綷 +- 綽 +- 緁 +- 緲 +- 縒 +- 縹 +- 繆 +- 繏 +- 缑 +- 缙 +- 罨 +- 羂 +- 羣 +- 羶 +- 翙 +- 聼 +- 肟 +- 艀 +- 艹 +- 芰 +- 芻 +- 苎 +- 茏 +- 茖 +- 茭 +- 莚 +- 莸 +- 莾 +- 萜 +- 萡 +- 蒽 +- 蓀 +- 蓁 +- 蓥 +- 蕘 +- 蕞 +- 薟 +- 藳 +- 蛲 +- 蜑 +- 蜞 +- 蝥 +- 螋 +- 蟇 +- 蟳 +- 衒 +- 衮 +- 袛 +- 袰 +- 裄 +- 裎 +- 褦 +- 褫 +- 襤 +- 觔 +- 觚 +- 詬 +- 諚 +- 諤 +- 謔 +- 譫 +- 讒 +- 诮 +- 谘 +- 谳 +- 貉 +- 貮 +- 赀 +- 赓 +- 赟 +- 踅 +- 蹺 +- 輊 +- 輟 +- 輳 +- 轾 +- 辎 +- 辶 +- 迍 +- 郃 +- 郗 +- 郛 +- 郧 +- 鄯 +- 醯 +- 釆 +- 釿 +- 鈪 +- 鉅 +- 鉎 +- 銥 +- 銳 +- 鎭 +- 鐇 +- 鐶 +- 鑛 +- 鑢 +- 钕 +- 钤 +- 钫 +- 钬 +- 钿 +- 铙 +- 铧 +- 铽 +- 锝 +- 锟 +- 镝 +- 镡 +- 闼 +- 隗 +- 雺 +- 霈 +- 韃 +- 韜 +- 韫 +- 餚 +- 餾 +- 饑 +- 馔 +- 駝 +- 驺 +- 驽 +- 骝 +- 髧 +- 鮒 +- 鮖 +- 鯣 +- 鰌 +- 鰒 +- 鱉 +- 鱷 +- 鲗 +- 鲩 +- 鲾 +- 鳀 +- 鳊 +- 鳚 +- 鳜 +- 鳢 +- 鳰 +- 鴿 +- 鹀 +- 鹟 +- 麭 +- 黩 +- 鼢 +- 鼷 +- 齴 +- 龢 +- ꞌ +- 갼 +- 겝 +- 겻 +- 곘 +- 괍 +- 괏 +- 궂 +- 귯 +- 꺅 +- 꺤 +- 껒 +- 꽨 +- 꽷 +- 꾿 +- 뀨 +- 끕 +- 낏 +- 넵 +- 녘 +- 놥 +- 눟 +- 늗 +- 늣 +- 닪 +- 닸 +- 덷 +- 뎄 +- 뎡 +- 돚 +- 됑 +- 듭 +- 듶 +- 딫 +- 땍 +- 땟 +- 떈 +- 떱 +- 뗏 +- 똫 +- 뙤 +- 뚸 +- 뛌 +- 뜹 +- 띌 +- 띔 +- 럤 +- 롄 +- 뤃 +- 뤼 +- 맀 +- 먀 +- 먙 +- 멨 +- 묀 +- 뭍 +- 뭥 +- 뭬 +- 뭰 +- 믁 +- 믐 +- 밈 +- 밎 +- 뱁 +- 뱡 +- 벋 +- 벛 +- 볻 +- 봥 +- 뵤 +- 붜 +- 뷘 +- 뺘 +- 뻬 +- 뼘 +- 뼜 +- 뿕 +- 뿟 +- 쁩 +- 쁸 +- 삣 +- 삧 +- 삯 +- 셤 +- 셥 +- 셸 +- 솝 +- 솨 +- 쇽 +- 쉪 +- 쉭 +- 쌘 +- 쎼 +- 쏸 +- 쐐 +- 쐴 +- 쑬 +- 쒯 +- 씃 +- 앎 +- 앏 +- 앝 +- 얉 +- 얋 +- 얐 +- 얬 +- 옙 +- 옜 +- 왝 +- 왯 +- 윅 +- 읒 +- 잿 +- 쟝 +- 젭 +- 젱 +- 졀 +- 좃 +- 좇 +- 좐 +- 죈 +- 죵 +- 줜 +- 줴 +- 쥔 +- 쥘 +- 짢 +- 쩟 +- ํ +- ଋ +- 닯 +- 뜀 +- 룟 +- 뭡 +- 쌂 +- Ѓ +- ઁ +- ః +- 摅 +- 欉 +- 洶 +- 牘 +- 篪 +- 繃 +- 艻 +- 跩 +- 鷯 +- 뫼 +- 왐 +- ఙ +- 仂 +- 俍 +- 埖 +- 堊 +- 尅 +- 悾 +- 掅 +- 摎 +- 柢 +- 樨 +- 橂 +- 歁 +- 殕 +- 狯 +- 珜 +- 珰 +- 瑆 +- 畭 +- 穰 +- 箨 +- 缳 +- 罌 +- 耨 +- 臌 +- 苁 +- 萄 +- 蠓 +- 蠖 +- 蠶 +- 褸 +- 襠 +- 觴 +- 謖 +- 酺 +- 铊 +- 駟 +- 鰍 +- 黾 +- ♡ +- 僢 +- 剉 +- 噘 +- 圞 +- 圹 +- 岨 +- 弉 +- 昺 +- 橢 +- 濶 +- 焮 +- 禩 +- 秡 +- 秫 +- 笊 +- 茌 +- 蕖 +- 藹 +- 螅 +- 袆 +- 郏 +- 鉋 +- 銹 +- 铌 +- 駸 +- 墻 +- 悋 +- 暱 +- 櫨 +- 浬 +- 筬 +- 糺 +- 紑 +- 誂 +- 賎 +- 跹 +- 蹁 +- 蹠 +- 鰤 +- ằ +- 蒻 +- 鞨 +- ắ +- 笵 +- +- +- +init: null +input_size: null +ctc_conf: + dropout_rate: 0.0 + ctc_type: builtin + reduce: true + ignore_nan_grad: null + zero_infinity: true + brctc_risk_strategy: exp + brctc_group_strategy: end + brctc_risk_factor: 0.0 +use_preprocessor: true +token_type: bpe +bpemodel: data/token_list/bpe_unigram50000/bpe.model +non_linguistic_symbols: null +cleaner: null +g2p: null +speech_volume_normalize: null +rir_scp: null +rir_apply_prob: 1.0 +noise_scp: null +noise_apply_prob: 1.0 +noise_db_range: '13_15' +short_noise_thres: 0.5 +frontend: default +frontend_conf: + n_fft: 512 + win_length: 400 + hop_length: 160 + fs: 16k +specaug: specaug +specaug_conf: + apply_time_warp: false + time_warp_window: 5 + time_warp_mode: bicubic + apply_freq_mask: true + freq_mask_width_range: + - 0 + - 27 + num_freq_mask: 2 + apply_time_mask: true + time_mask_width_ratio_range: + - 0.0 + - 0.05 + num_time_mask: 4 +normalize: global_mvn +normalize_conf: + stats_file: exp/s2t_stats_raw_bpe50000/train/feats_stats.npz +model: espnet +model_conf: + ctc_weight: 0.3 + lsm_weight: 0.1 + length_normalized_loss: false + sym_na: +preencoder: null +preencoder_conf: {} +encoder: e_branchformer +encoder_conf: + output_size: 768 + attention_heads: 12 + attention_layer_type: selfattn + pos_enc_layer_type: abs_pos + rel_pos_type: latest + attention_qk_norm: false + use_flash_attn: true + cgmlp_linear_units: 3072 + cgmlp_conv_kernel: 31 + use_linear_after_conv: false + gate_activation: identity + num_blocks: 9 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + attention_dropout_rate: 0.1 + input_layer: conv2d + layer_drop_rate: 0.0 + linear_units: 3072 + positionwise_layer_type: linear + use_ffn: true + macaron_ffn: true + merge_conv_kernel: 31 +postencoder: null +postencoder_conf: {} +decoder: transformer +decoder_conf: + attention_heads: 12 + linear_units: 3072 + num_blocks: 9 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0.1 + src_attention_dropout_rate: 0.1 + self_attention_qk_norm: false + src_attention_qk_norm: false + self_attention_use_flash_attn: true + src_attention_use_flash_attn: true +preprocessor: s2t +preprocessor_conf: + text_prev_name: text_prev + text_ctc_name: text_ctc + fs: 16000 + na_symbol: + speech_length: 30 + speech_resolution: 0.02 + speech_init_silence: 30 + text_prev_apply_prob: 0.5 + time_apply_prob: 0.5 + notime_symbol: + first_time_symbol: <0.00> + last_time_symbol: <30.00> +required: +- output_dir +- token_list +version: '202310' +distributed: true diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/acc.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/acc.png new file mode 100644 index 0000000000000000000000000000000000000000..e414c2e3bc190a12206068bb3c0f9a6a468993d7 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/acc.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/backward_time.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/backward_time.png new file mode 100644 index 0000000000000000000000000000000000000000..63b79499286820d0136d5e8bdc2a5066d3d6756e Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/backward_time.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/cer.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/cer.png new file mode 100644 index 0000000000000000000000000000000000000000..9c366bb98c5c9bc09c20d6ee2625199c34fa3451 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/cer.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/cer_ctc.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/cer_ctc.png new file mode 100644 index 0000000000000000000000000000000000000000..38b79337308f9302f46e33db2fe4b0062dc22a34 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/cer_ctc.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/clip.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/clip.png new file mode 100644 index 0000000000000000000000000000000000000000..789ed9ef23eeab1febcc37f3f4bab15d9acd8bce Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/clip.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/forward_time.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/forward_time.png new file mode 100644 index 0000000000000000000000000000000000000000..d54f64ddc161aea1784926d76008eddbcf5cc565 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/forward_time.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/gpu_max_cached_mem_GB.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/gpu_max_cached_mem_GB.png new file mode 100644 index 0000000000000000000000000000000000000000..fc526667db6c56d0bb8af60c04c16ebc1036ee90 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/gpu_max_cached_mem_GB.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/grad_norm.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..5d22c63d117b3ed3a11ca0566bfbfd1112ea5de5 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/grad_norm.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/iter_time.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/iter_time.png new file mode 100644 index 0000000000000000000000000000000000000000..936f3b5d62082379f5b1fbbca4df5c44a5dc4924 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/iter_time.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..2b84dc1b8e30670013e279f832ecadfa0098a825 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_att.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_att.png new file mode 100644 index 0000000000000000000000000000000000000000..4432400662ab46d985167c6dbf4a652f7ecab95d Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_att.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_ctc.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_ctc.png new file mode 100644 index 0000000000000000000000000000000000000000..c15d6cd4534616cde52fdecc4f70373d580f9e66 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_ctc.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_scale.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_scale.png new file mode 100644 index 0000000000000000000000000000000000000000..e3a15585bf5417ddec89d579bf915165642612ba Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/loss_scale.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/optim0_lr0.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/optim0_lr0.png new file mode 100644 index 0000000000000000000000000000000000000000..81674e3a65b9cfdd1da4edb6fae59580a585c1ce Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/optim0_lr0.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/optim_step_time.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/optim_step_time.png new file mode 100644 index 0000000000000000000000000000000000000000..45976239e4b0ac6924495b5afb8a1d8edb9ba8be Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/optim_step_time.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/train_time.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/train_time.png new file mode 100644 index 0000000000000000000000000000000000000000..88651ec4ca8996db83d84d50808aaec11fd01906 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/train_time.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/wer.png b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/wer.png new file mode 100644 index 0000000000000000000000000000000000000000..5d502027ac02512e2500bd444fc27eca5b11c974 Binary files /dev/null and b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/images/wer.png differ diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.1.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.1.log new file mode 100644 index 0000000000000000000000000000000000000000..19fe3d561c74fd64648b4eb1d1fb67546f036c4e --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.1.log @@ -0,0 +1,3053 @@ +# Running on gpub011.delta.ncsa.illinois.edu +# Started at Thu Feb 8 08:43:30 CST 2024 +# SLURMD_NODENAME=gpub011 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2955233 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1707576192 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2955233 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[011,016,034,076]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1707403392 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[011,016,034,076]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=2467443 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub011 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_3e0c3ae9-034f-440b-9923-b21bb4cc8764 +GpuFreq=control_disabled +GpuFreq=control_disabled +GpuFreq=control_disabled +GpuFreq=control_disabled +/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_3e0c3ae9-034f-440b-9923-b21bb4cc8764 +/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_3e0c3ae9-034f-440b-9923-b21bb4cc8764 +/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_3e0c3ae9-034f-440b-9923-b21bb4cc8764 +/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_3e0c3ae9-034f-440b-9923-b21bb4cc8764 +[gpub011:0/16] 2024-02-08 08:48:38,376 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub011:0/16] 2024-02-08 08:48:38,559 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub011:0/16] 2024-02-08 08:48:38,600 (s2t:464) INFO: Vocabulary size: 50002 +[gpub011:0/16] 2024-02-08 08:48:49,773 (abs_task:1271) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub011:0/16] 2024-02-08 08:48:49,779 (abs_task:1272) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub011:0/16] 2024-02-08 08:48:49,779 (abs_task:1275) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub011:0/16] 2024-02-08 08:48:49,779 (abs_task:1276) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub011:0/16] 2024-02-08 08:48:49,789 (abs_task:1285) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub011:0/16] 2024-02-08 08:48:55,873 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 08:48:56,871 (abs_task:1663) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 08:48:56,871 (abs_task:1664) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub011:0/16] 2024-02-08 08:48:56,888 (abs_task:1665) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 08:49:11,517 (trainer:168) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub011:2467529:2467529 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2467529:2467529 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2467529:2467529 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2467529:2467529 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub011:0/16] 2024-02-08 08:49:17,259 (trainer:302) INFO: 34/45epoch started +[gpub011:0/16] 2024-02-08 08:49:17,301 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-08 08:49:35,783 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 08:49:39,462 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 08:49:39,462 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-08 08:49:39,465 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub034:4171618:4171618 [3] NCCL INFO cudaDriverVersion 12020 +gpub034:4171618:4171618 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.134<0> +gpub034:4171618:4171618 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub034:4171618:4171618 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub034:4171618:4171676 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub034:4171618:4171676 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub034:4171618:4171676 [3] NCCL INFO Using network AWS Libfabric +gpub034:4171618:4171676 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub034:4171618:4171676 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub034:4171618:4171676 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub034:4171618:4171676 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub034:4171618:4171676 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub034:4171618:4171676 [3] NCCL INFO Connected all rings +gpub034:4171618:4171676 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub034:4171618:4171676 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub034:4171618:4171676 [3] NCCL INFO Connected all trees +gpub034:4171618:4171676 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub034:4171618:4171676 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub034:4171618:4171676 [3] NCCL INFO comm 0x559d9e42e9a0 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub076:3117064:3117064 [3] NCCL INFO cudaDriverVersion 12020 +gpub076:3117064:3117064 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3117064:3117064 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3117064:3117064 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3117064:3117119 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3117064:3117119 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3117064:3117119 [3] NCCL INFO Using network AWS Libfabric +gpub076:3117064:3117119 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub076:3117064:3117119 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3117064:3117119 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub076:3117064:3117119 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub076:3117064:3117119 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub011:2467532:2467532 [3] NCCL INFO cudaDriverVersion 12020 +gpub011:2467532:2467532 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2467532:2467532 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2467532:2467532 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2467532:2467589 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2467532:2467589 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2467532:2467589 [3] NCCL INFO Using network AWS Libfabric +gpub011:2467532:2467589 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub011:2467532:2467589 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2467532:2467589 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub011:2467532:2467589 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub011:2467532:2467589 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub016:2958313:2958313 [2] NCCL INFO cudaDriverVersion 12020 +gpub016:2958313:2958313 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:2958313:2958313 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub016:2958313:2958313 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub016:2958313:2958365 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub016:2958313:2958365 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub016:2958313:2958365 [2] NCCL INFO Using network AWS Libfabric +gpub016:2958313:2958365 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub016:2958313:2958365 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub016:2958313:2958365 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub016:2958313:2958365 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub016:2958313:2958365 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub034:4171616:4171616 [1] NCCL INFO cudaDriverVersion 12020 +gpub034:4171616:4171616 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.134<0> +gpub034:4171616:4171616 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub034:4171616:4171616 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub034:4171616:4171678 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub034:4171616:4171678 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub034:4171616:4171678 [1] NCCL INFO Using network AWS Libfabric +gpub034:4171616:4171678 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub034:4171616:4171678 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub034:4171616:4171678 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub034:4171616:4171678 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub034:4171616:4171678 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub076:3117064:3117119 [3] NCCL INFO Connected all rings +gpub076:3117064:3117119 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub076:3117064:3117119 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub076:3117064:3117119 [3] NCCL INFO Connected all trees +gpub076:3117064:3117119 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3117064:3117119 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3117064:3117119 [3] NCCL INFO comm 0x55a55825a5f0 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub011:2467532:2467589 [3] NCCL INFO Connected all rings +gpub011:2467532:2467589 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub011:2467532:2467589 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub011:2467532:2467589 [3] NCCL INFO Connected all trees +gpub011:2467532:2467589 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2467532:2467589 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2467532:2467589 [3] NCCL INFO comm 0x55acc3bb5c50 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub016:2958313:2958365 [2] NCCL INFO Connected all rings +gpub016:2958313:2958365 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub016:2958313:2958365 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub016:2958313:2958365 [2] NCCL INFO Connected all trees +gpub016:2958313:2958365 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub016:2958313:2958365 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:2958313:2958365 [2] NCCL INFO comm 0x559baeb5c040 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub034:4171616:4171678 [1] NCCL INFO Connected all rings +gpub034:4171616:4171678 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/AWS Libfabric/1 +gpub034:4171616:4171678 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub034:4171616:4171678 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub034:4171616:4171678 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub034:4171616:4171678 [1] NCCL INFO Connected all trees +gpub034:4171616:4171678 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub034:4171616:4171678 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub034:4171616:4171678 [1] NCCL INFO comm 0x55da68fdf000 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub076:3117062:3117062 [1] NCCL INFO cudaDriverVersion 12020 +gpub076:3117062:3117062 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3117062:3117062 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3117062:3117062 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3117062:3117118 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3117062:3117118 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3117062:3117118 [1] NCCL INFO Using network AWS Libfabric +gpub076:3117062:3117118 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub076:3117062:3117118 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3117062:3117118 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub076:3117062:3117118 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub076:3117062:3117118 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub034:4171615:4171615 [0] NCCL INFO cudaDriverVersion 12020 +gpub034:4171615:4171615 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.134<0> +gpub034:4171615:4171615 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub034:4171615:4171615 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub034:4171615:4171679 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub034:4171615:4171679 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub034:4171615:4171679 [0] NCCL INFO Using network AWS Libfabric +gpub034:4171615:4171679 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub034:4171615:4171679 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub034:4171615:4171679 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub034:4171615:4171679 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3117062:3117118 [1] NCCL INFO Connected all rings +gpub076:3117062:3117118 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub076:3117062:3117118 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub076:3117062:3117118 [1] NCCL INFO Connected all trees +gpub076:3117062:3117118 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3117062:3117118 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3117062:3117118 [1] NCCL INFO comm 0x560de3e8e1c0 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub034:4171615:4171679 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub034:4171615:4171679 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub034:4171615:4171679 [0] NCCL INFO Connected all rings +gpub034:4171615:4171679 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO Connected all trees +gpub034:4171615:4171679 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3117061:3117061 [0] NCCL INFO cudaDriverVersion 12020 +gpub076:3117061:3117061 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3117061:3117061 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3117061:3117061 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3117061:3117120 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3117061:3117120 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3117061:3117120 [0] NCCL INFO Using network AWS Libfabric +gpub076:3117061:3117120 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub076:3117061:3117120 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3117061:3117120 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub076:3117061:3117120 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub034:4171615:4171679 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub034:4171615:4171679 [0] NCCL INFO comm 0x55dc8dbe4970 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub076:3117061:3117120 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3117061:3117120 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub076:3117061:3117120 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub076:3117061:3117120 [0] NCCL INFO Connected all rings +gpub076:3117061:3117120 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3117061:3117120 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3117061:3117120 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub076:3117061:3117120 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub076:3117061:3117120 [0] NCCL INFO Connected all trees +gpub076:3117061:3117120 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3117061:3117120 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub034:4171617:4171617 [2] NCCL INFO cudaDriverVersion 12020 +gpub034:4171617:4171617 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.134<0> +gpub034:4171617:4171617 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub034:4171617:4171617 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub034:4171617:4171677 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub034:4171617:4171677 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub034:4171617:4171677 [2] NCCL INFO Using network AWS Libfabric +gpub034:4171617:4171677 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub034:4171617:4171677 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub034:4171617:4171677 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub034:4171617:4171677 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub034:4171617:4171677 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub076:3117061:3117120 [0] NCCL INFO comm 0x561ec5e990b0 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub034:4171617:4171677 [2] NCCL INFO Connected all rings +gpub034:4171617:4171677 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub034:4171617:4171677 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub034:4171617:4171677 [2] NCCL INFO Connected all trees +gpub034:4171617:4171677 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub034:4171617:4171677 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub034:4171617:4171677 [2] NCCL INFO comm 0x558a7a00f580 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub011:2467529:2467588 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2467529:2467588 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2467529:2467588 [0] NCCL INFO Using network AWS Libfabric +gpub011:2467529:2467588 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub011:2467529:2467588 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2467529:2467588 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub011:2467529:2467588 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub011:2467529:2467588 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub011:2467529:2467588 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2467529:2467588 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2467529:2467588 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub011:2467529:2467588 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub011:2467529:2467588 [0] NCCL INFO Connected all rings +gpub011:2467529:2467588 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub011:2467529:2467588 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2467529:2467588 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub011:2467529:2467588 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2467529:2467588 [0] NCCL INFO Connected all trees +gpub011:2467529:2467588 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2467529:2467588 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2467529:2467588 [0] NCCL INFO comm 0x55ab67717e30 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub076:3117063:3117063 [2] NCCL INFO cudaDriverVersion 12020 +gpub076:3117063:3117063 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3117063:3117063 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3117063:3117063 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3117063:3117121 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3117063:3117121 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3117063:3117121 [2] NCCL INFO Using network AWS Libfabric +gpub076:3117063:3117121 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub076:3117063:3117121 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3117063:3117121 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub076:3117063:3117121 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub076:3117063:3117121 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub076:3117063:3117121 [2] NCCL INFO Connected all rings +gpub076:3117063:3117121 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub076:3117063:3117121 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub076:3117063:3117121 [2] NCCL INFO Connected all trees +gpub076:3117063:3117121 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3117063:3117121 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3117063:3117121 [2] NCCL INFO comm 0x55b047547ea0 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub011:2467530:2467530 [1] NCCL INFO cudaDriverVersion 12020 +gpub011:2467530:2467530 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2467530:2467530 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2467530:2467530 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2467530:2467590 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2467530:2467590 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2467530:2467590 [1] NCCL INFO Using network AWS Libfabric +gpub011:2467530:2467590 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub011:2467530:2467590 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2467530:2467590 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub011:2467530:2467590 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub011:2467530:2467590 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub011:2467530:2467590 [1] NCCL INFO Connected all rings +gpub011:2467530:2467590 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub011:2467530:2467590 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub011:2467530:2467590 [1] NCCL INFO Connected all trees +gpub011:2467530:2467590 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2467530:2467590 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2467530:2467590 [1] NCCL INFO comm 0x563b04c01990 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub016:2958314:2958314 [3] NCCL INFO cudaDriverVersion 12020 +gpub016:2958314:2958314 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:2958314:2958314 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub016:2958314:2958314 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub016:2958314:2958366 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub016:2958314:2958366 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub016:2958314:2958366 [3] NCCL INFO Using network AWS Libfabric +gpub016:2958314:2958366 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub016:2958314:2958366 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub016:2958314:2958366 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub016:2958314:2958366 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub016:2958314:2958366 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub016:2958314:2958366 [3] NCCL INFO Connected all rings +gpub016:2958314:2958366 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub016:2958314:2958366 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub016:2958314:2958366 [3] NCCL INFO Connected all trees +gpub016:2958314:2958366 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub016:2958314:2958366 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:2958314:2958366 [3] NCCL INFO comm 0x558a91ead080 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub016:2958311:2958311 [0] NCCL INFO cudaDriverVersion 12020 +gpub016:2958311:2958311 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:2958311:2958311 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub016:2958311:2958311 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub016:2958311:2958367 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub016:2958311:2958367 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub016:2958311:2958367 [0] NCCL INFO Using network AWS Libfabric +gpub016:2958311:2958367 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub016:2958311:2958367 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub016:2958311:2958367 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub016:2958311:2958367 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2467531:2467531 [2] NCCL INFO cudaDriverVersion 12020 +gpub011:2467531:2467531 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2467531:2467531 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2467531:2467531 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2467531:2467591 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2467531:2467591 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2467531:2467591 [2] NCCL INFO Using network AWS Libfabric +gpub011:2467531:2467591 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub011:2467531:2467591 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2467531:2467591 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub011:2467531:2467591 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub011:2467531:2467591 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub016:2958311:2958367 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub016:2958311:2958367 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub016:2958311:2958367 [0] NCCL INFO Connected all rings +gpub016:2958311:2958367 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub016:2958311:2958367 [0] NCCL INFO Connected all trees +gpub016:2958311:2958367 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2467531:2467591 [2] NCCL INFO Connected all rings +gpub011:2467531:2467591 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub011:2467531:2467591 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub011:2467531:2467591 [2] NCCL INFO Connected all trees +gpub011:2467531:2467591 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2467531:2467591 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2467531:2467591 [2] NCCL INFO comm 0x560f9aadaa00 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub016:2958311:2958367 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:2958311:2958367 [0] NCCL INFO comm 0x55ec6c5ae8f0 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub016:2958312:2958312 [1] NCCL INFO cudaDriverVersion 12020 +gpub016:2958312:2958312 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:2958312:2958312 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub016:2958312:2958312 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub016:2958312:2958368 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub016:2958312:2958368 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub016:2958312:2958368 [1] NCCL INFO Using network AWS Libfabric +gpub016:2958312:2958368 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub016:2958312:2958368 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub016:2958312:2958368 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub016:2958312:2958368 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub016:2958312:2958368 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub016:2958312:2958368 [1] NCCL INFO Connected all rings +gpub016:2958312:2958368 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub016:2958312:2958368 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/AWS Libfabric/1 +gpub016:2958312:2958368 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub016:2958312:2958368 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub016:2958312:2958368 [1] NCCL INFO Connected all trees +gpub016:2958312:2958368 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub016:2958312:2958368 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:2958312:2958368 [1] NCCL INFO comm 0x564a10a37d30 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +[gpub011:0/16] 2024-02-08 08:58:18,546 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub011:0/16] 2024-02-08 09:00:56,555 (trainer:762) INFO: 34epoch:train:1-100batch: iter_time=3.917, forward_time=0.424, loss_ctc=57.807, loss_att=56.924, acc=0.726, loss=57.189, backward_time=0.314, grad_norm=51.142, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.741e-04, train_time=6.992 +[gpub011:0/16] 2024-02-08 09:03:32,919 (trainer:762) INFO: 34epoch:train:101-200batch: iter_time=9.181e-05, forward_time=0.287, loss_ctc=51.602, loss_att=45.692, acc=0.748, loss=47.465, backward_time=0.294, grad_norm=42.536, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.741e-04, train_time=1.564 +[gpub011:0/16] 2024-02-08 09:05:54,706 (trainer:762) INFO: 34epoch:train:201-300batch: iter_time=9.684e-05, forward_time=0.284, loss_ctc=39.152, loss_att=35.811, acc=0.763, loss=36.813, backward_time=0.290, grad_norm=36.878, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.741e-04, train_time=1.418 +[gpub011:0/16] 2024-02-08 09:07:51,285 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 09:08:37,090 (trainer:762) INFO: 34epoch:train:301-400batch: iter_time=9.825e-05, forward_time=0.419, loss_ctc=49.050, loss_att=50.994, acc=0.742, loss=50.410, backward_time=0.369, grad_norm=43.273, clip=100.000, loss_scale=8.759e+33, optim_step_time=0.098, optim0_lr0=1.740e-04, train_time=1.624 +[gpub011:0/16] 2024-02-08 09:10:49,222 (trainer:762) INFO: 34epoch:train:401-500batch: iter_time=1.022e-04, forward_time=0.287, loss_ctc=46.960, loss_att=43.412, acc=0.746, loss=44.477, backward_time=0.293, grad_norm=45.787, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.740e-04, train_time=1.321 +[gpub011:0/16] 2024-02-08 09:13:19,134 (trainer:762) INFO: 34epoch:train:501-600batch: iter_time=8.744e-05, forward_time=0.314, loss_ctc=46.638, loss_att=47.204, acc=0.752, loss=47.034, backward_time=0.295, grad_norm=43.547, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.740e-04, train_time=1.499 +[gpub011:0/16] 2024-02-08 09:15:48,856 (trainer:762) INFO: 34epoch:train:601-700batch: iter_time=9.323e-05, forward_time=0.431, loss_ctc=45.248, loss_att=46.841, acc=0.763, loss=46.363, backward_time=0.351, grad_norm=38.819, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.740e-04, train_time=1.497 +[gpub011:0/16] 2024-02-08 09:18:31,739 (trainer:762) INFO: 34epoch:train:701-800batch: iter_time=9.563e-05, forward_time=0.285, loss_ctc=37.408, loss_att=35.732, acc=0.783, loss=36.235, backward_time=0.289, grad_norm=33.343, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.740e-04, train_time=1.628 +[gpub011:0/16] 2024-02-08 09:20:57,807 (trainer:762) INFO: 34epoch:train:801-900batch: iter_time=2.923e-04, forward_time=0.392, loss_ctc=47.334, loss_att=42.192, acc=0.755, loss=43.735, backward_time=0.325, grad_norm=43.544, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.739e-04, train_time=1.460 +[gpub011:0/16] 2024-02-08 09:23:23,746 (trainer:762) INFO: 34epoch:train:901-1000batch: iter_time=1.050e-04, forward_time=0.290, loss_ctc=52.460, loss_att=51.230, acc=0.747, loss=51.599, backward_time=0.295, grad_norm=44.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.739e-04, train_time=1.460 +[gpub011:0/16] 2024-02-08 09:25:39,568 (trainer:762) INFO: 34epoch:train:1001-1100batch: iter_time=8.523e-05, forward_time=0.288, loss_ctc=47.100, loss_att=40.059, acc=0.764, loss=42.171, backward_time=0.294, grad_norm=44.761, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.739e-04, train_time=1.358 +[gpub011:0/16] 2024-02-08 09:28:38,456 (trainer:762) INFO: 34epoch:train:1101-1200batch: iter_time=9.256e-05, forward_time=0.421, loss_ctc=46.862, loss_att=40.942, acc=0.750, loss=42.718, backward_time=0.319, grad_norm=40.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.739e-04, train_time=1.788 +[gpub011:0/16] 2024-02-08 09:29:58,768 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-08 09:30:17,698 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 09:30:21,260 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 09:30:21,261 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-08 09:30:21,264 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 09:37:11,389 (trainer:762) INFO: 34epoch:train:1201-1300batch: iter_time=3.692, forward_time=0.289, loss_ctc=48.461, loss_att=49.164, acc=0.748, loss=48.953, backward_time=0.295, grad_norm=42.217, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.739e-04, train_time=5.130 +[gpub011:0/16] 2024-02-08 09:39:54,821 (trainer:762) INFO: 34epoch:train:1301-1400batch: iter_time=8.706e-05, forward_time=0.436, loss_ctc=52.462, loss_att=50.271, acc=0.758, loss=50.928, backward_time=0.333, grad_norm=44.169, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.739e-04, train_time=1.634 +[gpub011:0/16] 2024-02-08 09:41:43,885 (trainer:762) INFO: 34epoch:train:1401-1500batch: iter_time=8.012e-05, forward_time=0.290, loss_ctc=43.703, loss_att=39.824, acc=0.759, loss=40.988, backward_time=0.298, grad_norm=37.089, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.738e-04, train_time=1.090 +[gpub011:0/16] 2024-02-08 09:44:01,141 (trainer:762) INFO: 34epoch:train:1501-1600batch: iter_time=8.511e-05, forward_time=0.289, loss_ctc=42.698, loss_att=38.514, acc=0.767, loss=39.769, backward_time=0.293, grad_norm=37.721, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.738e-04, train_time=1.373 +[gpub011:0/16] 2024-02-08 09:46:22,786 (trainer:762) INFO: 34epoch:train:1601-1700batch: iter_time=7.998e-05, forward_time=0.292, loss_ctc=50.032, loss_att=51.786, acc=0.753, loss=51.259, backward_time=0.297, grad_norm=40.504, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.738e-04, train_time=1.416 +[gpub011:0/16] 2024-02-08 09:48:51,487 (trainer:762) INFO: 34epoch:train:1701-1800batch: iter_time=8.743e-05, forward_time=0.397, loss_ctc=46.769, loss_att=49.145, acc=0.739, loss=48.432, backward_time=0.353, grad_norm=42.038, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.738e-04, train_time=1.486 +[gpub011:0/16] 2024-02-08 09:50:53,490 (trainer:762) INFO: 34epoch:train:1801-1900batch: iter_time=8.805e-05, forward_time=0.292, loss_ctc=42.995, loss_att=43.106, acc=0.788, loss=43.073, backward_time=0.296, grad_norm=34.260, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.738e-04, train_time=1.220 +[gpub011:0/16] 2024-02-08 09:53:05,960 (trainer:762) INFO: 34epoch:train:1901-2000batch: iter_time=1.031e-04, forward_time=0.289, loss_ctc=41.566, loss_att=38.414, acc=0.790, loss=39.360, backward_time=0.295, grad_norm=33.733, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.738e-04, train_time=1.325 +[gpub011:0/16] 2024-02-08 09:55:17,518 (trainer:762) INFO: 34epoch:train:2001-2100batch: iter_time=8.695e-05, forward_time=0.288, loss_ctc=42.028, loss_att=40.652, acc=0.766, loss=41.065, backward_time=0.294, grad_norm=39.714, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.737e-04, train_time=1.315 +[gpub011:0/16] 2024-02-08 09:57:34,567 (trainer:762) INFO: 34epoch:train:2101-2200batch: iter_time=9.133e-05, forward_time=0.371, loss_ctc=48.683, loss_att=49.803, acc=0.752, loss=49.467, backward_time=0.400, grad_norm=38.810, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.737e-04, train_time=1.370 +[gpub011:0/16] 2024-02-08 09:59:45,494 (trainer:762) INFO: 34epoch:train:2201-2300batch: iter_time=9.174e-05, forward_time=0.294, loss_ctc=49.423, loss_att=46.353, acc=0.758, loss=47.274, backward_time=0.298, grad_norm=47.292, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.737e-04, train_time=1.308 +[gpub011:0/16] 2024-02-08 10:02:11,547 (trainer:762) INFO: 34epoch:train:2301-2400batch: iter_time=8.302e-05, forward_time=0.308, loss_ctc=43.031, loss_att=37.884, acc=0.775, loss=39.428, backward_time=0.293, grad_norm=37.875, clip=100.000, loss_scale=6.802e+33, optim_step_time=0.093, optim0_lr0=1.737e-04, train_time=1.462 +[gpub011:0/16] 2024-02-08 10:04:05,926 (trainer:762) INFO: 34epoch:train:2401-2500batch: iter_time=8.898e-05, forward_time=0.290, loss_ctc=44.679, loss_att=40.293, acc=0.767, loss=41.608, backward_time=0.297, grad_norm=37.737, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.737e-04, train_time=1.144 +[gpub011:0/16] 2024-02-08 10:04:25,954 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-08 10:04:44,796 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 10:04:48,376 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 10:04:48,376 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-08 10:04:48,379 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 10:12:42,460 (trainer:762) INFO: 34epoch:train:2501-2600batch: iter_time=3.682, forward_time=0.538, loss_ctc=56.430, loss_att=56.109, acc=0.736, loss=56.205, backward_time=0.345, grad_norm=46.739, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.114, optim0_lr0=1.737e-04, train_time=5.165 +[gpub011:0/16] 2024-02-08 10:14:54,586 (trainer:762) INFO: 34epoch:train:2601-2700batch: iter_time=9.379e-05, forward_time=0.419, loss_ctc=50.591, loss_att=45.636, acc=0.759, loss=47.123, backward_time=0.351, grad_norm=41.519, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.736e-04, train_time=1.321 +[gpub011:0/16] 2024-02-08 10:17:25,934 (trainer:762) INFO: 34epoch:train:2701-2800batch: iter_time=5.675e-04, forward_time=0.325, loss_ctc=38.399, loss_att=35.093, acc=0.772, loss=36.085, backward_time=0.293, grad_norm=34.804, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.736e-04, train_time=1.513 +[gpub011:0/16] 2024-02-08 10:19:35,840 (trainer:762) INFO: 34epoch:train:2801-2900batch: iter_time=3.012e-04, forward_time=0.384, loss_ctc=47.733, loss_att=50.620, acc=0.752, loss=49.754, backward_time=0.369, grad_norm=40.597, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.736e-04, train_time=1.300 +[gpub011:0/16] 2024-02-08 10:21:53,702 (trainer:762) INFO: 34epoch:train:2901-3000batch: iter_time=1.011e-04, forward_time=0.446, loss_ctc=46.138, loss_att=42.707, acc=0.759, loss=43.736, backward_time=0.326, grad_norm=40.255, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.736e-04, train_time=1.377 +[gpub011:0/16] 2024-02-08 10:24:24,744 (trainer:762) INFO: 34epoch:train:3001-3100batch: iter_time=3.336e-04, forward_time=0.437, loss_ctc=45.841, loss_att=48.496, acc=0.759, loss=47.700, backward_time=0.359, grad_norm=42.261, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=1.736e-04, train_time=1.511 +[gpub011:0/16] 2024-02-08 10:26:54,811 (trainer:762) INFO: 34epoch:train:3101-3200batch: iter_time=4.855e-04, forward_time=0.369, loss_ctc=44.925, loss_att=45.815, acc=0.784, loss=45.548, backward_time=0.377, grad_norm=33.805, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.735e-04, train_time=1.500 +[gpub011:0/16] 2024-02-08 10:29:25,773 (trainer:762) INFO: 34epoch:train:3201-3300batch: iter_time=5.409e-04, forward_time=0.399, loss_ctc=36.835, loss_att=34.365, acc=0.795, loss=35.106, backward_time=0.344, grad_norm=31.396, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.735e-04, train_time=1.510 +[gpub011:0/16] 2024-02-08 10:31:32,216 (trainer:762) INFO: 34epoch:train:3301-3400batch: iter_time=0.002, forward_time=0.314, loss_ctc=45.509, loss_att=40.570, acc=0.768, loss=42.052, backward_time=0.315, grad_norm=40.013, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.735e-04, train_time=1.264 +[gpub011:0/16] 2024-02-08 10:34:03,401 (trainer:762) INFO: 34epoch:train:3401-3500batch: iter_time=9.146e-05, forward_time=0.412, loss_ctc=50.974, loss_att=53.973, acc=0.745, loss=53.074, backward_time=0.324, grad_norm=43.913, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.735e-04, train_time=1.512 +[gpub011:0/16] 2024-02-08 10:36:44,937 (trainer:762) INFO: 34epoch:train:3501-3600batch: iter_time=2.214e-04, forward_time=0.462, loss_ctc=44.514, loss_att=39.680, acc=0.772, loss=41.130, backward_time=0.319, grad_norm=41.659, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.735e-04, train_time=1.614 +[gpub011:0/16] 2024-02-08 10:39:23,796 (trainer:762) INFO: 34epoch:train:3601-3700batch: iter_time=3.231e-04, forward_time=0.397, loss_ctc=45.871, loss_att=39.620, acc=0.765, loss=41.495, backward_time=0.361, grad_norm=39.120, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.735e-04, train_time=1.589 +[gpub011:0/16] 2024-02-08 10:40:44,279 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-08 10:41:03,552 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 10:41:07,139 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 10:41:07,139 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-08 10:41:07,142 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 10:47:47,384 (trainer:762) INFO: 34epoch:train:3701-3800batch: iter_time=3.222, forward_time=0.347, loss_ctc=47.634, loss_att=48.722, acc=0.752, loss=48.396, backward_time=0.303, grad_norm=41.910, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.734e-04, train_time=5.036 +[gpub011:0/16] 2024-02-08 10:50:06,148 (trainer:762) INFO: 34epoch:train:3801-3900batch: iter_time=8.706e-05, forward_time=0.376, loss_ctc=51.261, loss_att=48.072, acc=0.756, loss=49.028, backward_time=0.314, grad_norm=43.602, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.734e-04, train_time=1.387 +[gpub011:0/16] 2024-02-08 10:52:18,694 (trainer:762) INFO: 34epoch:train:3901-4000batch: iter_time=8.142e-05, forward_time=0.287, loss_ctc=43.236, loss_att=38.822, acc=0.757, loss=40.146, backward_time=0.293, grad_norm=37.535, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.734e-04, train_time=1.325 +[gpub011:0/16] 2024-02-08 10:54:40,022 (trainer:762) INFO: 34epoch:train:4001-4100batch: iter_time=2.328e-04, forward_time=0.385, loss_ctc=42.319, loss_att=38.158, acc=0.762, loss=39.407, backward_time=0.348, grad_norm=39.771, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.734e-04, train_time=1.414 +[gpub011:0/16] 2024-02-08 10:56:40,982 (trainer:762) INFO: 34epoch:train:4101-4200batch: iter_time=0.002, forward_time=0.300, loss_ctc=49.602, loss_att=51.052, acc=0.750, loss=50.617, backward_time=0.314, grad_norm=41.893, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.734e-04, train_time=1.209 +[gpub011:0/16] 2024-02-08 10:59:09,432 (trainer:762) INFO: 34epoch:train:4201-4300batch: iter_time=8.331e-05, forward_time=0.397, loss_ctc=46.178, loss_att=47.826, acc=0.735, loss=47.331, backward_time=0.334, grad_norm=42.283, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.734e-04, train_time=1.484 +[gpub011:0/16] 2024-02-08 11:00:34,606 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 11:01:35,599 (trainer:762) INFO: 34epoch:train:4301-4400batch: iter_time=8.578e-05, forward_time=0.291, loss_ctc=42.309, loss_att=42.707, acc=0.783, loss=42.587, backward_time=0.297, grad_norm=38.222, clip=100.000, loss_scale=8.129e+33, optim_step_time=0.093, optim0_lr0=1.733e-04, train_time=1.461 +[gpub011:0/16] 2024-02-08 11:03:50,045 (trainer:762) INFO: 34epoch:train:4401-4500batch: iter_time=8.064e-05, forward_time=0.424, loss_ctc=40.880, loss_att=39.462, acc=0.775, loss=39.887, backward_time=0.344, grad_norm=33.893, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.733e-04, train_time=1.344 +[gpub011:0/16] 2024-02-08 11:05:56,212 (trainer:762) INFO: 34epoch:train:4501-4600batch: iter_time=8.465e-05, forward_time=0.289, loss_ctc=41.082, loss_att=40.719, acc=0.763, loss=40.828, backward_time=0.292, grad_norm=39.343, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.733e-04, train_time=1.262 +[gpub011:0/16] 2024-02-08 11:08:33,779 (trainer:762) INFO: 34epoch:train:4601-4700batch: iter_time=8.608e-05, forward_time=0.418, loss_ctc=48.412, loss_att=47.644, acc=0.752, loss=47.875, backward_time=0.361, grad_norm=39.982, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.733e-04, train_time=1.575 +[gpub011:0/16] 2024-02-08 11:10:41,592 (trainer:762) INFO: 34epoch:train:4701-4800batch: iter_time=8.388e-05, forward_time=0.292, loss_ctc=48.737, loss_att=44.119, acc=0.763, loss=45.504, backward_time=0.297, grad_norm=44.341, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.733e-04, train_time=1.278 +[gpub011:0/16] 2024-02-08 11:13:06,819 (trainer:762) INFO: 34epoch:train:4801-4900batch: iter_time=1.603e-04, forward_time=0.411, loss_ctc=42.184, loss_att=37.075, acc=0.771, loss=38.608, backward_time=0.373, grad_norm=37.015, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.733e-04, train_time=1.452 +[gpub011:0/16] 2024-02-08 11:15:07,131 (trainer:762) INFO: 34epoch:train:4901-5000batch: iter_time=8.033e-05, forward_time=0.311, loss_ctc=43.675, loss_att=40.627, acc=0.758, loss=41.542, backward_time=0.295, grad_norm=38.135, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.732e-04, train_time=1.203 +[gpub011:0/16] 2024-02-08 11:15:27,160 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-08 11:15:46,134 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 11:15:49,571 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 11:15:49,571 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-08 11:15:49,575 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 11:23:07,393 (trainer:762) INFO: 34epoch:train:5001-5100batch: iter_time=3.567, forward_time=0.292, loss_ctc=55.587, loss_att=56.381, acc=0.737, loss=56.143, backward_time=0.302, grad_norm=48.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.732e-04, train_time=4.802 +[gpub011:0/16] 2024-02-08 11:25:34,099 (trainer:762) INFO: 34epoch:train:5101-5200batch: iter_time=8.508e-05, forward_time=0.398, loss_ctc=49.834, loss_att=46.035, acc=0.758, loss=47.175, backward_time=0.335, grad_norm=39.137, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.732e-04, train_time=1.467 +[gpub011:0/16] 2024-02-08 11:27:29,245 (trainer:762) INFO: 34epoch:train:5201-5300batch: iter_time=8.233e-05, forward_time=0.289, loss_ctc=38.152, loss_att=35.159, acc=0.775, loss=36.057, backward_time=0.294, grad_norm=36.394, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.732e-04, train_time=1.151 +[gpub011:0/16] 2024-02-08 11:29:48,889 (trainer:762) INFO: 34epoch:train:5301-5400batch: iter_time=8.371e-05, forward_time=0.293, loss_ctc=47.360, loss_att=50.177, acc=0.754, loss=49.332, backward_time=0.299, grad_norm=41.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.732e-04, train_time=1.396 +[gpub011:0/16] 2024-02-08 11:32:07,144 (trainer:762) INFO: 34epoch:train:5401-5500batch: iter_time=8.074e-05, forward_time=0.394, loss_ctc=44.930, loss_att=42.614, acc=0.760, loss=43.309, backward_time=0.347, grad_norm=77.991, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.731e-04, train_time=1.382 +[gpub011:0/16] 2024-02-08 11:34:14,156 (trainer:762) INFO: 34epoch:train:5501-5600batch: iter_time=8.069e-05, forward_time=0.291, loss_ctc=44.842, loss_att=47.532, acc=0.763, loss=46.725, backward_time=0.296, grad_norm=39.318, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.731e-04, train_time=1.270 +[gpub011:0/16] 2024-02-08 11:36:27,876 (trainer:762) INFO: 34epoch:train:5601-5700batch: iter_time=8.124e-05, forward_time=0.319, loss_ctc=44.405, loss_att=45.570, acc=0.785, loss=45.220, backward_time=0.299, grad_norm=34.933, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.731e-04, train_time=1.337 +[gpub011:0/16] 2024-02-08 11:38:50,937 (trainer:762) INFO: 34epoch:train:5701-5800batch: iter_time=1.974e-04, forward_time=0.425, loss_ctc=36.976, loss_att=33.729, acc=0.799, loss=34.703, backward_time=0.345, grad_norm=31.330, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.731e-04, train_time=1.430 +[gpub011:0/16] 2024-02-08 11:41:03,768 (trainer:762) INFO: 34epoch:train:5801-5900batch: iter_time=8.250e-05, forward_time=0.290, loss_ctc=44.686, loss_att=41.360, acc=0.765, loss=42.358, backward_time=0.292, grad_norm=41.467, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.731e-04, train_time=1.328 +[gpub011:0/16] 2024-02-08 11:42:58,127 (trainer:762) INFO: 34epoch:train:5901-6000batch: iter_time=8.212e-05, forward_time=0.294, loss_ctc=50.012, loss_att=53.433, acc=0.746, loss=52.406, backward_time=0.300, grad_norm=44.386, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.731e-04, train_time=1.144 +[gpub011:0/16] 2024-02-08 11:45:18,664 (trainer:762) INFO: 34epoch:train:6001-6100batch: iter_time=2.193e-04, forward_time=0.343, loss_ctc=44.025, loss_att=39.716, acc=0.773, loss=41.009, backward_time=0.315, grad_norm=41.838, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.730e-04, train_time=1.405 +[gpub011:0/16] 2024-02-08 11:48:01,276 (trainer:762) INFO: 34epoch:train:6101-6200batch: iter_time=8.846e-05, forward_time=0.354, loss_ctc=45.594, loss_att=39.269, acc=0.766, loss=41.166, backward_time=0.327, grad_norm=39.862, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.730e-04, train_time=1.626 +[gpub011:0/16] 2024-02-08 11:49:16,011 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-08 11:49:35,383 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 11:49:38,794 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 11:49:38,795 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-08 11:49:38,800 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 11:56:03,397 (trainer:762) INFO: 34epoch:train:6201-6300batch: iter_time=3.647, forward_time=0.291, loss_ctc=47.672, loss_att=48.438, acc=0.751, loss=48.208, backward_time=0.297, grad_norm=41.568, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.730e-04, train_time=4.821 +[gpub011:0/16] 2024-02-08 11:58:09,375 (trainer:762) INFO: 34epoch:train:6301-6400batch: iter_time=8.840e-05, forward_time=0.289, loss_ctc=51.608, loss_att=47.766, acc=0.760, loss=48.919, backward_time=0.296, grad_norm=45.558, clip=100.000, loss_scale=7.425e+33, optim_step_time=0.094, optim0_lr0=1.730e-04, train_time=1.259 +[gpub011:0/16] 2024-02-08 11:59:32,883 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 12:00:40,172 (trainer:762) INFO: 34epoch:train:6401-6500batch: iter_time=8.737e-05, forward_time=0.475, loss_ctc=43.661, loss_att=39.120, acc=0.758, loss=40.482, backward_time=0.316, grad_norm=38.425, clip=100.000, loss_scale=7.815e+33, optim_step_time=0.104, optim0_lr0=1.730e-04, train_time=1.507 +[gpub011:0/16] 2024-02-08 12:02:38,295 (trainer:762) INFO: 34epoch:train:6501-6600batch: iter_time=7.987e-05, forward_time=0.289, loss_ctc=42.138, loss_att=38.016, acc=0.765, loss=39.252, backward_time=0.295, grad_norm=39.276, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.730e-04, train_time=1.182 +[gpub011:0/16] 2024-02-08 12:04:53,224 (trainer:762) INFO: 34epoch:train:6601-6700batch: iter_time=8.032e-05, forward_time=0.292, loss_ctc=49.094, loss_att=50.921, acc=0.749, loss=50.373, backward_time=0.297, grad_norm=42.384, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.729e-04, train_time=1.349 +[gpub011:0/16] 2024-02-08 12:06:45,762 (trainer:762) INFO: 34epoch:train:6701-6800batch: iter_time=8.909e-05, forward_time=0.291, loss_ctc=45.737, loss_att=47.131, acc=0.738, loss=46.713, backward_time=0.295, grad_norm=43.931, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.729e-04, train_time=1.126 +[gpub011:0/16] 2024-02-08 12:09:19,975 (trainer:762) INFO: 34epoch:train:6801-6900batch: iter_time=9.106e-05, forward_time=0.397, loss_ctc=42.379, loss_att=42.540, acc=0.787, loss=42.492, backward_time=0.339, grad_norm=43.953, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.729e-04, train_time=1.541 +[gpub011:0/16] 2024-02-08 12:11:25,464 (trainer:762) INFO: 34epoch:train:6901-7000batch: iter_time=8.795e-05, forward_time=0.289, loss_ctc=40.820, loss_att=39.107, acc=0.778, loss=39.621, backward_time=0.295, grad_norm=35.988, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.729e-04, train_time=1.256 +[gpub011:0/16] 2024-02-08 12:13:38,372 (trainer:762) INFO: 34epoch:train:7001-7100batch: iter_time=8.593e-05, forward_time=0.288, loss_ctc=41.002, loss_att=40.998, acc=0.764, loss=40.999, backward_time=0.294, grad_norm=40.055, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.729e-04, train_time=1.328 +[gpub011:0/16] 2024-02-08 12:15:51,865 (trainer:762) INFO: 34epoch:train:7101-7200batch: iter_time=8.582e-05, forward_time=0.312, loss_ctc=48.269, loss_att=47.565, acc=0.751, loss=47.776, backward_time=0.299, grad_norm=38.525, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.729e-04, train_time=1.336 +[gpub011:0/16] 2024-02-08 12:18:05,841 (trainer:762) INFO: 34epoch:train:7201-7300batch: iter_time=8.468e-05, forward_time=0.411, loss_ctc=48.403, loss_att=42.956, acc=0.767, loss=44.590, backward_time=0.319, grad_norm=46.737, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.728e-04, train_time=1.338 +[gpub011:0/16] 2024-02-08 12:20:29,168 (trainer:762) INFO: 34epoch:train:7301-7400batch: iter_time=8.179e-05, forward_time=0.289, loss_ctc=42.530, loss_att=37.179, acc=0.772, loss=38.784, backward_time=0.293, grad_norm=37.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.728e-04, train_time=1.434 +[gpub011:0/16] 2024-02-08 12:22:26,405 (trainer:762) INFO: 34epoch:train:7401-7500batch: iter_time=8.242e-05, forward_time=0.289, loss_ctc=43.891, loss_att=40.427, acc=0.759, loss=41.466, backward_time=0.295, grad_norm=39.675, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.728e-04, train_time=1.171 +[gpub011:0/16] 2024-02-08 12:22:46,433 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-08 12:23:05,741 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 12:23:09,284 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 12:23:09,284 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-08 12:23:09,287 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 12:30:37,832 (trainer:762) INFO: 34epoch:train:7501-7600batch: iter_time=3.605, forward_time=0.403, loss_ctc=54.920, loss_att=55.708, acc=0.734, loss=55.472, backward_time=0.316, grad_norm=50.251, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.728e-04, train_time=4.915 +[gpub011:0/16] 2024-02-08 12:32:56,990 (trainer:762) INFO: 34epoch:train:7601-7700batch: iter_time=7.951e-05, forward_time=0.289, loss_ctc=50.122, loss_att=43.133, acc=0.758, loss=45.229, backward_time=0.294, grad_norm=40.466, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.728e-04, train_time=1.392 +[gpub011:0/16] 2024-02-08 12:34:52,146 (trainer:762) INFO: 34epoch:train:7701-7800batch: iter_time=8.233e-05, forward_time=0.287, loss_ctc=37.860, loss_att=34.350, acc=0.771, loss=35.403, backward_time=0.292, grad_norm=35.140, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.728e-04, train_time=1.151 +[gpub011:0/16] 2024-02-08 12:37:18,160 (trainer:762) INFO: 34epoch:train:7801-7900batch: iter_time=9.180e-05, forward_time=0.395, loss_ctc=46.622, loss_att=49.043, acc=0.751, loss=48.317, backward_time=0.361, grad_norm=42.586, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.727e-04, train_time=1.459 +[gpub011:0/16] 2024-02-08 12:39:18,478 (trainer:762) INFO: 34epoch:train:7901-8000batch: iter_time=8.026e-05, forward_time=0.290, loss_ctc=45.261, loss_att=42.184, acc=0.753, loss=43.107, backward_time=0.295, grad_norm=40.197, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.727e-04, train_time=1.204 +[gpub011:0/16] 2024-02-08 12:41:31,814 (trainer:762) INFO: 34epoch:train:8001-8100batch: iter_time=8.189e-05, forward_time=0.290, loss_ctc=45.173, loss_att=46.200, acc=0.760, loss=45.892, backward_time=0.295, grad_norm=41.039, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.727e-04, train_time=1.333 +[gpub011:0/16] 2024-02-08 12:43:33,659 (trainer:762) INFO: 34epoch:train:8101-8200batch: iter_time=8.882e-05, forward_time=0.295, loss_ctc=43.891, loss_att=46.248, acc=0.768, loss=45.541, backward_time=0.306, grad_norm=35.134, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.727e-04, train_time=1.219 +[gpub011:0/16] 2024-02-08 12:46:03,069 (trainer:762) INFO: 34epoch:train:8201-8300batch: iter_time=8.123e-05, forward_time=0.455, loss_ctc=36.305, loss_att=34.317, acc=0.793, loss=34.913, backward_time=0.316, grad_norm=31.965, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.112, optim0_lr0=1.727e-04, train_time=1.492 +[gpub011:0/16] 2024-02-08 12:48:20,731 (trainer:762) INFO: 34epoch:train:8301-8400batch: iter_time=8.828e-05, forward_time=0.290, loss_ctc=44.979, loss_att=41.215, acc=0.760, loss=42.344, backward_time=0.294, grad_norm=41.358, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.726e-04, train_time=1.378 +[gpub011:0/16] 2024-02-08 12:50:14,570 (trainer:762) INFO: 34epoch:train:8401-8500batch: iter_time=8.443e-05, forward_time=0.293, loss_ctc=50.671, loss_att=49.349, acc=0.756, loss=49.746, backward_time=0.301, grad_norm=43.866, clip=100.000, loss_scale=7.737e+33, optim_step_time=0.093, optim0_lr0=1.726e-04, train_time=1.138 +[gpub011:0/16] 2024-02-08 12:52:50,998 (trainer:762) INFO: 34epoch:train:8501-8600batch: iter_time=8.642e-05, forward_time=0.374, loss_ctc=43.771, loss_att=39.454, acc=0.769, loss=40.749, backward_time=0.336, grad_norm=42.040, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.726e-04, train_time=1.564 +[gpub011:0/16] 2024-02-08 12:55:07,777 (trainer:762) INFO: 34epoch:train:8601-8700batch: iter_time=8.426e-05, forward_time=0.353, loss_ctc=45.172, loss_att=39.213, acc=0.756, loss=41.001, backward_time=0.311, grad_norm=39.664, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.726e-04, train_time=1.366 +[gpub011:0/16] 2024-02-08 12:56:24,268 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-08 12:56:43,624 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 12:56:47,208 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 12:56:47,208 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-08 12:56:47,216 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 13:04:07,914 (trainer:762) INFO: 34epoch:train:8701-8800batch: iter_time=4.111, forward_time=0.396, loss_ctc=47.119, loss_att=47.813, acc=0.755, loss=47.605, backward_time=0.320, grad_norm=39.391, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.726e-04, train_time=5.403 +[gpub011:0/16] 2024-02-08 13:06:13,758 (trainer:762) INFO: 34epoch:train:8801-8900batch: iter_time=8.161e-05, forward_time=0.292, loss_ctc=50.589, loss_att=49.360, acc=0.764, loss=49.729, backward_time=0.295, grad_norm=45.335, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.726e-04, train_time=1.258 +[gpub011:0/16] 2024-02-08 13:08:28,265 (trainer:762) INFO: 34epoch:train:8901-9000batch: iter_time=8.013e-05, forward_time=0.289, loss_ctc=43.084, loss_att=39.633, acc=0.761, loss=40.668, backward_time=0.293, grad_norm=34.655, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.725e-04, train_time=1.344 +[gpub011:0/16] 2024-02-08 13:11:01,825 (trainer:762) INFO: 34epoch:train:9001-9100batch: iter_time=8.156e-05, forward_time=0.472, loss_ctc=41.984, loss_att=37.884, acc=0.774, loss=39.114, backward_time=0.356, grad_norm=38.814, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.725e-04, train_time=1.537 +[gpub011:0/16] 2024-02-08 13:13:31,145 (trainer:762) INFO: 34epoch:train:9101-9200batch: iter_time=8.418e-05, forward_time=0.300, loss_ctc=48.961, loss_att=51.279, acc=0.758, loss=50.583, backward_time=0.298, grad_norm=41.331, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.725e-04, train_time=1.493 +[gpub011:0/16] 2024-02-08 13:15:43,205 (trainer:762) INFO: 34epoch:train:9201-9300batch: iter_time=8.131e-05, forward_time=0.288, loss_ctc=45.416, loss_att=49.206, acc=0.740, loss=48.069, backward_time=0.294, grad_norm=44.645, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.725e-04, train_time=1.319 +[gpub011:0/16] 2024-02-08 13:16:45,060 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 13:18:08,613 (trainer:762) INFO: 34epoch:train:9301-9400batch: iter_time=8.272e-05, forward_time=0.373, loss_ctc=42.103, loss_att=42.667, acc=0.793, loss=42.498, backward_time=0.376, grad_norm=37.564, clip=100.000, loss_scale=7.133e+33, optim_step_time=0.099, optim0_lr0=1.725e-04, train_time=1.455 +[gpub011:0/16] 2024-02-08 13:20:39,104 (trainer:762) INFO: 34epoch:train:9401-9500batch: iter_time=8.229e-05, forward_time=0.290, loss_ctc=40.751, loss_att=37.910, acc=0.796, loss=38.762, backward_time=0.300, grad_norm=33.788, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.725e-04, train_time=1.505 +[gpub011:0/16] 2024-02-08 13:22:45,459 (trainer:762) INFO: 34epoch:train:9501-9600batch: iter_time=7.820e-05, forward_time=0.315, loss_ctc=40.687, loss_att=39.985, acc=0.773, loss=40.195, backward_time=0.293, grad_norm=38.751, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.724e-04, train_time=1.263 +[gpub011:0/16] 2024-02-08 13:25:14,210 (trainer:762) INFO: 34epoch:train:9601-9700batch: iter_time=8.172e-05, forward_time=0.446, loss_ctc=48.165, loss_att=49.612, acc=0.755, loss=49.178, backward_time=0.326, grad_norm=41.089, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.724e-04, train_time=1.487 +[gpub011:0/16] 2024-02-08 13:27:36,182 (trainer:762) INFO: 34epoch:train:9701-9800batch: iter_time=8.453e-05, forward_time=0.292, loss_ctc=47.715, loss_att=46.102, acc=0.761, loss=46.586, backward_time=0.300, grad_norm=61.500, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.724e-04, train_time=1.420 +[gpub011:0/16] 2024-02-08 13:29:56,705 (trainer:762) INFO: 34epoch:train:9801-9900batch: iter_time=8.318e-05, forward_time=0.289, loss_ctc=41.907, loss_att=37.019, acc=0.780, loss=38.486, backward_time=0.295, grad_norm=36.041, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.724e-04, train_time=1.405 +[gpub011:0/16] 2024-02-08 13:32:07,700 (trainer:762) INFO: 34epoch:train:9901-10000batch: iter_time=8.204e-05, forward_time=0.410, loss_ctc=43.771, loss_att=40.146, acc=0.769, loss=41.234, backward_time=0.314, grad_norm=38.050, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.724e-04, train_time=1.310 +[gpub011:0/16] 2024-02-08 13:32:27,729 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-08 13:32:46,873 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 13:32:50,463 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 13:32:50,463 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-08 13:32:50,466 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 13:40:20,880 (trainer:762) INFO: 34epoch:train:10001-10100batch: iter_time=3.610, forward_time=0.297, loss_ctc=54.128, loss_att=54.992, acc=0.739, loss=54.733, backward_time=0.298, grad_norm=49.218, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.724e-04, train_time=4.931 +[gpub011:0/16] 2024-02-08 13:42:25,755 (trainer:762) INFO: 34epoch:train:10101-10200batch: iter_time=8.427e-05, forward_time=0.291, loss_ctc=49.717, loss_att=45.107, acc=0.762, loss=46.490, backward_time=0.297, grad_norm=40.664, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.723e-04, train_time=1.249 +[gpub011:0/16] 2024-02-08 13:44:50,838 (trainer:762) INFO: 34epoch:train:10201-10300batch: iter_time=9.126e-05, forward_time=0.399, loss_ctc=37.502, loss_att=34.396, acc=0.778, loss=35.328, backward_time=0.336, grad_norm=35.053, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.723e-04, train_time=1.450 +[gpub011:0/16] 2024-02-08 13:47:01,911 (trainer:762) INFO: 34epoch:train:10301-10400batch: iter_time=1.022e-04, forward_time=0.302, loss_ctc=46.535, loss_att=49.578, acc=0.756, loss=48.665, backward_time=0.299, grad_norm=40.688, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.723e-04, train_time=1.310 +[gpub011:0/16] 2024-02-08 13:48:57,467 (trainer:762) INFO: 34epoch:train:10401-10500batch: iter_time=8.622e-05, forward_time=0.313, loss_ctc=45.222, loss_att=42.366, acc=0.761, loss=43.223, backward_time=0.297, grad_norm=40.045, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.723e-04, train_time=1.155 +[gpub011:0/16] 2024-02-08 13:51:24,796 (trainer:762) INFO: 34epoch:train:10501-10600batch: iter_time=8.837e-05, forward_time=0.304, loss_ctc=45.001, loss_att=47.601, acc=0.764, loss=46.821, backward_time=0.313, grad_norm=41.786, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.723e-04, train_time=1.473 +[gpub011:0/16] 2024-02-08 13:53:50,045 (trainer:762) INFO: 34epoch:train:10601-10700batch: iter_time=8.837e-05, forward_time=0.396, loss_ctc=44.051, loss_att=45.033, acc=0.789, loss=44.738, backward_time=0.316, grad_norm=34.939, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.723e-04, train_time=1.452 +[gpub011:0/16] 2024-02-08 13:55:42,060 (trainer:762) INFO: 34epoch:train:10701-10800batch: iter_time=9.164e-05, forward_time=0.289, loss_ctc=36.511, loss_att=34.299, acc=0.798, loss=34.963, backward_time=0.294, grad_norm=32.600, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.722e-04, train_time=1.120 +[gpub011:0/16] 2024-02-08 13:58:23,321 (trainer:762) INFO: 34epoch:train:10801-10900batch: iter_time=9.180e-05, forward_time=0.289, loss_ctc=44.186, loss_att=40.270, acc=0.771, loss=41.445, backward_time=0.293, grad_norm=39.241, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.722e-04, train_time=1.612 +[gpub011:0/16] 2024-02-08 14:00:43,176 (trainer:762) INFO: 34epoch:train:10901-11000batch: iter_time=8.566e-05, forward_time=0.364, loss_ctc=50.329, loss_att=53.192, acc=0.748, loss=52.333, backward_time=0.340, grad_norm=44.407, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.722e-04, train_time=1.399 +[gpub011:0/16] 2024-02-08 14:03:45,518 (trainer:762) INFO: 34epoch:train:11001-11100batch: iter_time=0.075, forward_time=0.290, loss_ctc=43.879, loss_att=39.213, acc=0.775, loss=40.613, backward_time=0.298, grad_norm=40.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.722e-04, train_time=1.822 +[gpub011:0/16] 2024-02-08 14:05:53,764 (trainer:762) INFO: 34epoch:train:11101-11200batch: iter_time=9.086e-05, forward_time=0.292, loss_ctc=44.895, loss_att=38.957, acc=0.769, loss=40.739, backward_time=0.294, grad_norm=39.276, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.722e-04, train_time=1.283 +[gpub011:0/16] 2024-02-08 14:07:31,611 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-08 14:07:51,578 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 14:07:55,382 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 14:07:55,382 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-08 14:07:55,385 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 14:14:34,160 (trainer:762) INFO: 34epoch:train:11201-11300batch: iter_time=3.761, forward_time=0.373, loss_ctc=46.600, loss_att=46.699, acc=0.760, loss=46.669, backward_time=0.311, grad_norm=37.770, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.722e-04, train_time=5.204 +[gpub011:0/16] 2024-02-08 14:16:22,477 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 14:16:30,150 (trainer:762) INFO: 34epoch:train:11301-11400batch: iter_time=7.890e-05, forward_time=0.293, loss_ctc=50.368, loss_att=47.943, acc=0.766, loss=48.671, backward_time=0.296, grad_norm=44.266, clip=100.000, loss_scale=8.024e+33, optim_step_time=0.093, optim0_lr0=1.721e-04, train_time=1.159 +[gpub011:0/16] 2024-02-08 14:18:37,889 (trainer:762) INFO: 34epoch:train:11401-11500batch: iter_time=8.058e-05, forward_time=0.289, loss_ctc=43.023, loss_att=39.534, acc=0.761, loss=40.581, backward_time=0.293, grad_norm=36.905, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.721e-04, train_time=1.277 +[gpub011:0/16] 2024-02-08 14:21:19,385 (trainer:762) INFO: 34epoch:train:11501-11600batch: iter_time=4.459e-04, forward_time=0.381, loss_ctc=41.958, loss_att=37.329, acc=0.775, loss=38.717, backward_time=0.341, grad_norm=37.450, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.721e-04, train_time=1.614 +[gpub011:0/16] 2024-02-08 14:23:24,228 (trainer:762) INFO: 34epoch:train:11601-11700batch: iter_time=8.122e-05, forward_time=0.292, loss_ctc=49.020, loss_att=50.653, acc=0.759, loss=50.163, backward_time=0.299, grad_norm=39.591, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.721e-04, train_time=1.249 +[gpub011:0/16] 2024-02-08 14:25:45,933 (trainer:762) INFO: 34epoch:train:11701-11800batch: iter_time=8.115e-05, forward_time=0.289, loss_ctc=45.060, loss_att=48.626, acc=0.743, loss=47.556, backward_time=0.293, grad_norm=41.161, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.721e-04, train_time=1.416 +[gpub011:0/16] 2024-02-08 14:27:52,638 (trainer:762) INFO: 34epoch:train:11801-11900batch: iter_time=8.272e-05, forward_time=0.293, loss_ctc=41.960, loss_att=42.336, acc=0.793, loss=42.223, backward_time=0.299, grad_norm=37.516, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.721e-04, train_time=1.267 +[gpub011:0/16] 2024-02-08 14:30:04,718 (trainer:762) INFO: 34epoch:train:11901-12000batch: iter_time=8.013e-05, forward_time=0.386, loss_ctc=40.395, loss_att=37.472, acc=0.797, loss=38.349, backward_time=0.322, grad_norm=33.040, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.720e-04, train_time=1.319 +[gpub011:0/16] 2024-02-08 14:32:20,776 (trainer:762) INFO: 34epoch:train:12001-12100batch: iter_time=8.022e-05, forward_time=0.289, loss_ctc=40.373, loss_att=39.811, acc=0.773, loss=39.979, backward_time=0.292, grad_norm=39.927, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.720e-04, train_time=1.362 +[gpub011:0/16] 2024-02-08 14:33:43,825 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 14:34:16,248 (trainer:762) INFO: 34epoch:train:12101-12200batch: iter_time=8.041e-05, forward_time=0.293, loss_ctc=48.024, loss_att=49.126, acc=0.755, loss=48.795, backward_time=0.298, grad_norm=39.669, clip=100.000, loss_scale=4.484e+33, optim_step_time=0.093, optim0_lr0=1.720e-04, train_time=1.154 +[gpub011:0/16] 2024-02-08 14:36:43,854 (trainer:762) INFO: 34epoch:train:12201-12300batch: iter_time=8.022e-05, forward_time=0.403, loss_ctc=47.858, loss_att=45.654, acc=0.762, loss=46.315, backward_time=0.317, grad_norm=45.957, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.720e-04, train_time=1.476 +[gpub011:0/16] 2024-02-08 14:38:46,576 (trainer:762) INFO: 34epoch:train:12301-12400batch: iter_time=8.037e-05, forward_time=0.289, loss_ctc=41.823, loss_att=36.946, acc=0.781, loss=38.409, backward_time=0.296, grad_norm=38.021, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.720e-04, train_time=1.226 +[gpub011:0/16] 2024-02-08 14:40:45,108 (trainer:762) INFO: 34epoch:train:12401-12500batch: iter_time=7.929e-05, forward_time=0.289, loss_ctc=43.733, loss_att=40.115, acc=0.770, loss=41.200, backward_time=0.295, grad_norm=39.057, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.719e-04, train_time=1.186 +[gpub011:0/16] 2024-02-08 14:41:05,137 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-08 14:41:24,632 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 14:41:28,183 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 14:41:28,183 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-08 14:41:28,186 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 14:48:23,696 (trainer:762) INFO: 34epoch:train:12501-12600batch: iter_time=3.173, forward_time=0.383, loss_ctc=54.116, loss_att=56.584, acc=0.734, loss=55.844, backward_time=0.347, grad_norm=49.111, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.719e-04, train_time=4.586 +[gpub011:0/16] 2024-02-08 14:50:34,704 (trainer:762) INFO: 34epoch:train:12601-12700batch: iter_time=8.263e-05, forward_time=0.290, loss_ctc=49.487, loss_att=44.902, acc=0.754, loss=46.277, backward_time=0.294, grad_norm=39.475, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.719e-04, train_time=1.309 +[gpub011:0/16] 2024-02-08 14:53:01,377 (trainer:762) INFO: 34epoch:train:12701-12800batch: iter_time=8.161e-05, forward_time=0.286, loss_ctc=37.968, loss_att=34.868, acc=0.770, loss=35.798, backward_time=0.290, grad_norm=34.859, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.719e-04, train_time=1.467 +[gpub011:0/16] 2024-02-08 14:55:13,904 (trainer:762) INFO: 34epoch:train:12801-12900batch: iter_time=1.678e-04, forward_time=0.390, loss_ctc=46.662, loss_att=49.466, acc=0.750, loss=48.625, backward_time=0.338, grad_norm=41.372, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.719e-04, train_time=1.325 +[gpub011:0/16] 2024-02-08 14:57:33,754 (trainer:762) INFO: 34epoch:train:12901-13000batch: iter_time=8.368e-05, forward_time=0.288, loss_ctc=44.807, loss_att=42.359, acc=0.755, loss=43.093, backward_time=0.293, grad_norm=40.918, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.719e-04, train_time=1.397 +[gpub011:0/16] 2024-02-08 14:59:43,402 (trainer:762) INFO: 34epoch:train:13001-13100batch: iter_time=8.286e-05, forward_time=0.290, loss_ctc=44.878, loss_att=46.352, acc=0.761, loss=45.910, backward_time=0.296, grad_norm=40.584, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.718e-04, train_time=1.298 +[gpub011:0/16] 2024-02-08 15:02:20,603 (trainer:762) INFO: 34epoch:train:13101-13200batch: iter_time=8.345e-05, forward_time=0.403, loss_ctc=43.919, loss_att=46.072, acc=0.769, loss=45.426, backward_time=0.319, grad_norm=37.121, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.718e-04, train_time=1.572 +[gpub011:0/16] 2024-02-08 15:04:09,515 (trainer:762) INFO: 34epoch:train:13201-13300batch: iter_time=8.286e-05, forward_time=0.288, loss_ctc=36.974, loss_att=35.030, acc=0.790, loss=35.614, backward_time=0.293, grad_norm=32.753, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.718e-04, train_time=1.088 +[gpub011:0/16] 2024-02-08 15:06:28,111 (trainer:762) INFO: 34epoch:train:13301-13400batch: iter_time=8.108e-05, forward_time=0.296, loss_ctc=44.316, loss_att=40.885, acc=0.764, loss=41.914, backward_time=0.295, grad_norm=41.463, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.718e-04, train_time=1.387 +[gpub011:0/16] 2024-02-08 15:08:53,332 (trainer:762) INFO: 34epoch:train:13401-13500batch: iter_time=8.241e-05, forward_time=0.377, loss_ctc=50.027, loss_att=49.823, acc=0.754, loss=49.884, backward_time=0.340, grad_norm=43.189, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.718e-04, train_time=1.452 +[gpub011:0/16] 2024-02-08 15:10:47,119 (trainer:762) INFO: 34epoch:train:13501-13600batch: iter_time=8.497e-05, forward_time=0.289, loss_ctc=43.551, loss_att=38.563, acc=0.774, loss=40.060, backward_time=0.295, grad_norm=41.093, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.718e-04, train_time=1.137 +[gpub011:0/16] 2024-02-08 15:13:15,978 (trainer:762) INFO: 34epoch:train:13601-13700batch: iter_time=8.153e-05, forward_time=0.288, loss_ctc=45.023, loss_att=39.442, acc=0.756, loss=41.116, backward_time=0.292, grad_norm=40.864, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.717e-04, train_time=1.488 +[gpub011:0/16] 2024-02-08 15:14:49,565 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-08 15:15:08,998 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 15:15:12,531 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 15:15:12,531 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-08 15:15:12,536 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 15:21:21,934 (trainer:762) INFO: 34epoch:train:13701-13800batch: iter_time=3.446, forward_time=0.444, loss_ctc=46.843, loss_att=47.520, acc=0.753, loss=47.317, backward_time=0.324, grad_norm=40.184, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.717e-04, train_time=4.859 +[gpub011:0/16] 2024-02-08 15:23:39,517 (trainer:762) INFO: 34epoch:train:13801-13900batch: iter_time=7.559e-05, forward_time=0.289, loss_ctc=50.395, loss_att=46.170, acc=0.763, loss=47.438, backward_time=0.295, grad_norm=43.832, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.717e-04, train_time=1.376 +[gpub011:0/16] 2024-02-08 15:25:52,825 (trainer:762) INFO: 34epoch:train:13901-14000batch: iter_time=8.005e-05, forward_time=0.286, loss_ctc=43.325, loss_att=38.823, acc=0.759, loss=40.174, backward_time=0.296, grad_norm=35.443, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.717e-04, train_time=1.332 +[gpub011:0/16] 2024-02-08 15:28:00,782 (trainer:762) INFO: 34epoch:train:14001-14100batch: iter_time=8.083e-05, forward_time=0.419, loss_ctc=41.458, loss_att=37.113, acc=0.767, loss=38.416, backward_time=0.330, grad_norm=36.362, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.717e-04, train_time=1.280 +[gpub011:0/16] 2024-02-08 15:30:19,896 (trainer:762) INFO: 34epoch:train:14101-14200batch: iter_time=8.268e-05, forward_time=0.292, loss_ctc=48.679, loss_att=49.798, acc=0.753, loss=49.462, backward_time=0.297, grad_norm=42.026, clip=100.000, loss_scale=3.297e+33, optim_step_time=0.093, optim0_lr0=1.717e-04, train_time=1.391 +[gpub011:0/16] 2024-02-08 15:32:25,020 (trainer:762) INFO: 34epoch:train:14201-14300batch: iter_time=8.123e-05, forward_time=0.289, loss_ctc=45.352, loss_att=47.549, acc=0.738, loss=46.890, backward_time=0.295, grad_norm=42.753, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.716e-04, train_time=1.251 +[gpub011:0/16] 2024-02-08 15:34:34,700 (trainer:762) INFO: 34epoch:train:14301-14400batch: iter_time=7.951e-05, forward_time=0.405, loss_ctc=41.833, loss_att=42.378, acc=0.787, loss=42.215, backward_time=0.341, grad_norm=37.272, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.716e-04, train_time=1.296 +[gpub011:0/16] 2024-02-08 15:36:51,247 (trainer:762) INFO: 34epoch:train:14401-14500batch: iter_time=8.359e-05, forward_time=0.289, loss_ctc=41.011, loss_att=38.698, acc=0.780, loss=39.392, backward_time=0.294, grad_norm=35.438, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.716e-04, train_time=1.365 +[gpub011:0/16] 2024-02-08 15:38:58,010 (trainer:762) INFO: 34epoch:train:14501-14600batch: iter_time=8.340e-05, forward_time=0.288, loss_ctc=39.848, loss_att=40.033, acc=0.768, loss=39.978, backward_time=0.294, grad_norm=39.966, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.716e-04, train_time=1.268 +[gpub011:0/16] 2024-02-08 15:40:59,507 (trainer:762) INFO: 34epoch:train:14601-14700batch: iter_time=8.194e-05, forward_time=0.339, loss_ctc=47.812, loss_att=47.056, acc=0.755, loss=47.283, backward_time=0.301, grad_norm=39.182, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.716e-04, train_time=1.215 +[gpub011:0/16] 2024-02-08 15:43:19,589 (trainer:762) INFO: 34epoch:train:14701-14800batch: iter_time=8.567e-05, forward_time=0.338, loss_ctc=47.953, loss_att=43.489, acc=0.767, loss=44.828, backward_time=0.317, grad_norm=44.785, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.716e-04, train_time=1.401 +[gpub011:0/16] 2024-02-08 15:45:27,954 (trainer:762) INFO: 34epoch:train:14801-14900batch: iter_time=8.139e-05, forward_time=0.291, loss_ctc=41.749, loss_att=36.820, acc=0.775, loss=38.299, backward_time=0.296, grad_norm=37.113, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.715e-04, train_time=1.283 +[gpub011:0/16] 2024-02-08 15:47:40,200 (trainer:762) INFO: 34epoch:train:14901-15000batch: iter_time=2.338e-04, forward_time=0.319, loss_ctc=43.271, loss_att=40.169, acc=0.761, loss=41.100, backward_time=0.300, grad_norm=37.762, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.715e-04, train_time=1.322 +[gpub011:0/16] 2024-02-08 16:25:55,421 (trainer:361) INFO: 34epoch results: [train] iter_time=0.290, forward_time=0.336, loss_ctc=45.396, loss_att=43.729, acc=0.763, loss=44.229, backward_time=0.312, grad_norm=40.439, clip=100.000, loss_scale=6.019e+33, optim_step_time=0.096, optim0_lr0=1.728e-04, train_time=1.673, time=6 hours, 58 minutes and 46.82 seconds, total_count=540000, gpu_max_cached_mem_GB=43.805, [valid] loss_ctc=35.899, cer_ctc=0.183, loss_att=37.243, acc=0.701, cer=0.285, wer=0.989, loss=36.840, time=37 minutes and 50.89 seconds, total_count=168156, gpu_max_cached_mem_GB=43.805 +[gpub011:0/16] 2024-02-08 16:26:14,011 (trainer:416) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub011:0/16] 2024-02-08 16:26:14,072 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/29epoch.pth +[gpub011:0/16] 2024-02-08 16:26:14,072 (trainer:290) INFO: 35/45epoch started. Estimated time to finish: 3 days, 11 hours and 46 minutes +[gpub011:0/16] 2024-02-08 16:26:14,083 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-08 16:26:32,006 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 16:26:35,447 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 16:26:35,447 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-08 16:26:35,451 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 16:33:14,995 (trainer:762) INFO: 35epoch:train:1-100batch: iter_time=2.937, forward_time=0.374, loss_ctc=52.199, loss_att=54.027, acc=0.750, loss=53.479, backward_time=0.308, grad_norm=45.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.715e-04, train_time=4.208 +[gpub011:0/16] 2024-02-08 16:35:21,607 (trainer:762) INFO: 35epoch:train:101-200batch: iter_time=8.114e-05, forward_time=0.330, loss_ctc=47.202, loss_att=42.071, acc=0.775, loss=43.610, backward_time=0.341, grad_norm=43.197, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.715e-04, train_time=1.267 +[gpub011:0/16] 2024-02-08 16:37:24,935 (trainer:762) INFO: 35epoch:train:201-300batch: iter_time=8.363e-05, forward_time=0.333, loss_ctc=46.495, loss_att=50.000, acc=0.745, loss=48.949, backward_time=0.319, grad_norm=40.618, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.715e-04, train_time=1.233 +[gpub011:0/16] 2024-02-08 16:39:53,081 (trainer:762) INFO: 35epoch:train:301-400batch: iter_time=8.480e-05, forward_time=0.317, loss_ctc=43.986, loss_att=42.491, acc=0.767, loss=42.940, backward_time=0.308, grad_norm=38.976, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.715e-04, train_time=1.481 +[gpub011:0/16] 2024-02-08 16:42:00,008 (trainer:762) INFO: 35epoch:train:401-500batch: iter_time=2.459e-04, forward_time=0.345, loss_ctc=56.322, loss_att=55.749, acc=0.755, loss=55.921, backward_time=0.331, grad_norm=47.730, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.714e-04, train_time=1.269 +[gpub011:0/16] 2024-02-08 16:43:53,356 (trainer:762) INFO: 35epoch:train:501-600batch: iter_time=8.159e-05, forward_time=0.296, loss_ctc=46.042, loss_att=47.758, acc=0.760, loss=47.243, backward_time=0.301, grad_norm=41.242, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.714e-04, train_time=1.133 +[gpub011:0/16] 2024-02-08 16:46:20,788 (trainer:762) INFO: 35epoch:train:601-700batch: iter_time=8.567e-04, forward_time=0.350, loss_ctc=44.055, loss_att=41.378, acc=0.782, loss=42.181, backward_time=0.329, grad_norm=36.641, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.714e-04, train_time=1.474 +[gpub011:0/16] 2024-02-08 16:48:30,531 (trainer:762) INFO: 35epoch:train:701-800batch: iter_time=8.222e-05, forward_time=0.303, loss_ctc=46.808, loss_att=50.666, acc=0.744, loss=49.509, backward_time=0.303, grad_norm=44.429, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.714e-04, train_time=1.298 +[gpub011:0/16] 2024-02-08 16:50:27,119 (trainer:762) INFO: 35epoch:train:801-900batch: iter_time=7.913e-05, forward_time=0.339, loss_ctc=39.056, loss_att=41.609, acc=0.737, loss=40.843, backward_time=0.305, grad_norm=36.849, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.714e-04, train_time=1.166 +[gpub011:0/16] 2024-02-08 16:52:37,160 (trainer:762) INFO: 35epoch:train:901-1000batch: iter_time=3.822e-04, forward_time=0.369, loss_ctc=46.698, loss_att=45.589, acc=0.763, loss=45.922, backward_time=0.319, grad_norm=37.717, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.714e-04, train_time=1.300 +[gpub011:0/16] 2024-02-08 16:54:48,544 (trainer:762) INFO: 35epoch:train:1001-1100batch: iter_time=8.385e-05, forward_time=0.297, loss_ctc=43.837, loss_att=41.572, acc=0.754, loss=42.252, backward_time=0.297, grad_norm=44.640, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.713e-04, train_time=1.314 +[gpub011:0/16] 2024-02-08 16:57:08,340 (trainer:762) INFO: 35epoch:train:1101-1200batch: iter_time=4.537e-04, forward_time=0.375, loss_ctc=48.861, loss_att=46.300, acc=0.760, loss=47.069, backward_time=0.340, grad_norm=43.203, clip=100.000, loss_scale=6.594e+33, optim_step_time=0.099, optim0_lr0=1.713e-04, train_time=1.398 +[gpub011:0/16] 2024-02-08 16:58:24,745 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-08 16:58:43,866 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 16:58:47,492 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 16:58:47,492 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-08 16:58:47,495 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 17:05:12,167 (trainer:762) INFO: 35epoch:train:1201-1300batch: iter_time=3.559, forward_time=0.298, loss_ctc=48.965, loss_att=52.243, acc=0.759, loss=51.260, backward_time=0.301, grad_norm=41.055, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.713e-04, train_time=4.838 +[gpub011:0/16] 2024-02-08 17:07:10,316 (trainer:762) INFO: 35epoch:train:1301-1400batch: iter_time=8.335e-05, forward_time=0.297, loss_ctc=49.422, loss_att=48.766, acc=0.764, loss=48.963, backward_time=0.301, grad_norm=39.116, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.713e-04, train_time=1.181 +[gpub011:0/16] 2024-02-08 17:09:26,491 (trainer:762) INFO: 35epoch:train:1401-1500batch: iter_time=8.500e-05, forward_time=0.400, loss_ctc=46.273, loss_att=37.764, acc=0.770, loss=40.317, backward_time=0.331, grad_norm=95.939, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.713e-04, train_time=1.361 +[gpub011:0/16] 2024-02-08 17:11:46,362 (trainer:762) INFO: 35epoch:train:1501-1600batch: iter_time=8.315e-05, forward_time=0.332, loss_ctc=45.324, loss_att=50.452, acc=0.757, loss=48.914, backward_time=0.303, grad_norm=38.388, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.713e-04, train_time=1.397 +[gpub011:0/16] 2024-02-08 17:13:45,747 (trainer:762) INFO: 35epoch:train:1601-1700batch: iter_time=8.328e-05, forward_time=0.292, loss_ctc=48.989, loss_att=51.962, acc=0.748, loss=51.070, backward_time=0.299, grad_norm=44.229, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.712e-04, train_time=1.195 +[gpub011:0/16] 2024-02-08 17:15:57,765 (trainer:762) INFO: 35epoch:train:1701-1800batch: iter_time=8.480e-05, forward_time=0.319, loss_ctc=48.875, loss_att=49.748, acc=0.778, loss=49.486, backward_time=0.316, grad_norm=38.019, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.712e-04, train_time=1.320 +[gpub011:0/16] 2024-02-08 17:18:10,188 (trainer:762) INFO: 35epoch:train:1801-1900batch: iter_time=8.834e-05, forward_time=0.382, loss_ctc=43.372, loss_att=42.695, acc=0.774, loss=42.898, backward_time=0.319, grad_norm=39.602, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.712e-04, train_time=1.324 +[gpub011:0/16] 2024-02-08 17:20:21,579 (trainer:762) INFO: 35epoch:train:1901-2000batch: iter_time=8.044e-05, forward_time=0.290, loss_ctc=46.071, loss_att=42.234, acc=0.765, loss=43.385, backward_time=0.295, grad_norm=39.313, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.712e-04, train_time=1.313 +[gpub011:0/16] 2024-02-08 17:22:18,478 (trainer:762) INFO: 35epoch:train:2001-2100batch: iter_time=8.390e-05, forward_time=0.297, loss_ctc=43.134, loss_att=51.213, acc=0.731, loss=48.789, backward_time=0.301, grad_norm=38.940, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.712e-04, train_time=1.170 +[gpub011:0/16] 2024-02-08 17:24:36,344 (trainer:762) INFO: 35epoch:train:2101-2200batch: iter_time=8.788e-05, forward_time=0.322, loss_ctc=40.663, loss_att=42.242, acc=0.751, loss=41.768, backward_time=0.321, grad_norm=38.043, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.712e-04, train_time=1.378 +[gpub011:0/16] 2024-02-08 17:25:30,839 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 17:27:02,300 (trainer:762) INFO: 35epoch:train:2201-2300batch: iter_time=1.320e-04, forward_time=0.380, loss_ctc=43.434, loss_att=39.049, acc=0.765, loss=40.365, backward_time=0.305, grad_norm=40.042, clip=100.000, loss_scale=7.080e+33, optim_step_time=0.095, optim0_lr0=1.711e-04, train_time=1.459 +[gpub011:0/16] 2024-02-08 17:29:03,321 (trainer:762) INFO: 35epoch:train:2301-2400batch: iter_time=8.387e-05, forward_time=0.291, loss_ctc=44.102, loss_att=41.233, acc=0.775, loss=42.094, backward_time=0.296, grad_norm=40.502, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.711e-04, train_time=1.209 +[gpub011:0/16] 2024-02-08 17:31:20,859 (trainer:762) INFO: 35epoch:train:2401-2500batch: iter_time=7.975e-05, forward_time=0.302, loss_ctc=51.943, loss_att=54.736, acc=0.745, loss=53.898, backward_time=0.316, grad_norm=48.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.711e-04, train_time=1.377 +[gpub011:0/16] 2024-02-08 17:31:40,916 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-08 17:31:59,763 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 17:32:03,292 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 17:32:03,292 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-08 17:32:03,295 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 17:39:32,486 (trainer:762) INFO: 35epoch:train:2501-2600batch: iter_time=3.575, forward_time=0.383, loss_ctc=50.139, loss_att=50.854, acc=0.759, loss=50.639, backward_time=0.313, grad_norm=41.095, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.711e-04, train_time=4.916 +[gpub011:0/16] 2024-02-08 17:41:32,116 (trainer:762) INFO: 35epoch:train:2601-2700batch: iter_time=8.029e-05, forward_time=0.290, loss_ctc=46.325, loss_att=41.124, acc=0.779, loss=42.685, backward_time=0.296, grad_norm=42.586, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.711e-04, train_time=1.195 +[gpub011:0/16] 2024-02-08 17:43:52,793 (trainer:762) INFO: 35epoch:train:2701-2800batch: iter_time=8.049e-05, forward_time=0.308, loss_ctc=45.476, loss_att=48.284, acc=0.752, loss=47.441, backward_time=0.319, grad_norm=40.880, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.711e-04, train_time=1.408 +[gpub011:0/16] 2024-02-08 17:46:02,904 (trainer:762) INFO: 35epoch:train:2801-2900batch: iter_time=9.127e-05, forward_time=0.363, loss_ctc=43.619, loss_att=42.446, acc=0.770, loss=42.798, backward_time=0.307, grad_norm=38.418, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.710e-04, train_time=1.301 +[gpub011:0/16] 2024-02-08 17:48:14,859 (trainer:762) INFO: 35epoch:train:2901-3000batch: iter_time=8.487e-05, forward_time=0.294, loss_ctc=53.133, loss_att=55.330, acc=0.758, loss=54.671, backward_time=0.299, grad_norm=44.340, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.710e-04, train_time=1.319 +[gpub011:0/16] 2024-02-08 17:50:22,774 (trainer:762) INFO: 35epoch:train:3001-3100batch: iter_time=8.263e-05, forward_time=0.337, loss_ctc=44.930, loss_att=46.663, acc=0.767, loss=46.143, backward_time=0.329, grad_norm=41.407, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.710e-04, train_time=1.278 +[gpub011:0/16] 2024-02-08 17:52:34,000 (trainer:762) INFO: 35epoch:train:3101-3200batch: iter_time=9.230e-05, forward_time=0.354, loss_ctc=43.908, loss_att=40.863, acc=0.784, loss=41.776, backward_time=0.313, grad_norm=35.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.710e-04, train_time=1.314 +[gpub011:0/16] 2024-02-08 17:54:50,430 (trainer:762) INFO: 35epoch:train:3201-3300batch: iter_time=8.369e-05, forward_time=0.292, loss_ctc=46.005, loss_att=50.265, acc=0.746, loss=48.987, backward_time=0.299, grad_norm=43.439, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.710e-04, train_time=1.364 +[gpub011:0/16] 2024-02-08 17:57:05,040 (trainer:762) INFO: 35epoch:train:3301-3400batch: iter_time=7.984e-05, forward_time=0.414, loss_ctc=38.651, loss_att=40.649, acc=0.741, loss=40.050, backward_time=0.312, grad_norm=36.942, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.119, optim0_lr0=1.710e-04, train_time=1.346 +[gpub011:0/16] 2024-02-08 17:59:10,068 (trainer:762) INFO: 35epoch:train:3401-3500batch: iter_time=7.879e-05, forward_time=0.292, loss_ctc=46.035, loss_att=45.117, acc=0.768, loss=45.393, backward_time=0.297, grad_norm=37.528, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.709e-04, train_time=1.251 +[gpub011:0/16] 2024-02-08 18:01:17,351 (trainer:762) INFO: 35epoch:train:3501-3600batch: iter_time=7.976e-05, forward_time=0.287, loss_ctc=41.551, loss_att=40.274, acc=0.757, loss=40.657, backward_time=0.294, grad_norm=44.736, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.709e-04, train_time=1.272 +[gpub011:0/16] 2024-02-08 18:03:41,592 (trainer:762) INFO: 35epoch:train:3601-3700batch: iter_time=8.113e-05, forward_time=0.433, loss_ctc=47.286, loss_att=45.590, acc=0.763, loss=46.098, backward_time=0.318, grad_norm=41.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.709e-04, train_time=1.442 +[gpub011:0/16] 2024-02-08 18:05:03,845 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-08 18:05:23,141 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 18:05:26,674 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 18:05:26,674 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-08 18:05:26,677 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 18:11:51,122 (trainer:762) INFO: 35epoch:train:3701-3800batch: iter_time=3.584, forward_time=0.295, loss_ctc=48.234, loss_att=53.429, acc=0.753, loss=51.870, backward_time=0.298, grad_norm=41.268, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.709e-04, train_time=4.895 +[gpub011:0/16] 2024-02-08 18:13:42,827 (trainer:762) INFO: 35epoch:train:3801-3900batch: iter_time=7.839e-05, forward_time=0.290, loss_ctc=49.058, loss_att=50.735, acc=0.753, loss=50.232, backward_time=0.298, grad_norm=41.582, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.709e-04, train_time=1.116 +[gpub011:0/16] 2024-02-08 18:15:57,529 (trainer:762) INFO: 35epoch:train:3901-4000batch: iter_time=7.921e-05, forward_time=0.443, loss_ctc=45.346, loss_att=37.555, acc=0.770, loss=39.892, backward_time=0.313, grad_norm=42.226, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.709e-04, train_time=1.347 +[gpub011:0/16] 2024-02-08 18:18:33,323 (trainer:762) INFO: 35epoch:train:4001-4100batch: iter_time=7.846e-05, forward_time=0.292, loss_ctc=44.924, loss_att=48.963, acc=0.747, loss=47.751, backward_time=0.296, grad_norm=39.556, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.708e-04, train_time=1.558 +[gpub011:0/16] 2024-02-08 18:20:24,789 (trainer:762) INFO: 35epoch:train:4101-4200batch: iter_time=7.726e-05, forward_time=0.292, loss_ctc=48.219, loss_att=50.584, acc=0.741, loss=49.874, backward_time=0.299, grad_norm=46.282, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.708e-04, train_time=1.114 +[gpub011:0/16] 2024-02-08 18:22:42,244 (trainer:762) INFO: 35epoch:train:4201-4300batch: iter_time=7.857e-05, forward_time=0.425, loss_ctc=48.725, loss_att=50.447, acc=0.770, loss=49.931, backward_time=0.341, grad_norm=39.195, clip=100.000, loss_scale=8.463e+33, optim_step_time=0.097, optim0_lr0=1.708e-04, train_time=1.374 +[gpub011:0/16] 2024-02-08 18:25:07,818 (trainer:762) INFO: 35epoch:train:4301-4400batch: iter_time=7.837e-05, forward_time=0.289, loss_ctc=42.966, loss_att=42.549, acc=0.766, loss=42.674, backward_time=0.295, grad_norm=39.043, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.708e-04, train_time=1.455 +[gpub011:0/16] 2024-02-08 18:27:06,502 (trainer:762) INFO: 35epoch:train:4401-4500batch: iter_time=7.984e-05, forward_time=0.289, loss_ctc=45.580, loss_att=40.855, acc=0.762, loss=42.273, backward_time=0.294, grad_norm=44.341, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.708e-04, train_time=1.187 +[gpub011:0/16] 2024-02-08 18:29:04,625 (trainer:762) INFO: 35epoch:train:4501-4600batch: iter_time=7.953e-05, forward_time=0.321, loss_ctc=42.602, loss_att=49.514, acc=0.730, loss=47.440, backward_time=0.297, grad_norm=37.960, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.708e-04, train_time=1.181 +[gpub011:0/16] 2024-02-08 18:31:53,222 (trainer:762) INFO: 35epoch:train:4601-4700batch: iter_time=8.233e-05, forward_time=0.371, loss_ctc=40.408, loss_att=41.999, acc=0.738, loss=41.522, backward_time=0.348, grad_norm=37.590, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.707e-04, train_time=1.686 +[gpub011:0/16] 2024-02-08 18:33:49,324 (trainer:762) INFO: 35epoch:train:4701-4800batch: iter_time=7.895e-05, forward_time=0.287, loss_ctc=42.501, loss_att=39.387, acc=0.757, loss=40.321, backward_time=0.294, grad_norm=38.000, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.707e-04, train_time=1.160 +[gpub011:0/16] 2024-02-08 18:36:12,163 (trainer:762) INFO: 35epoch:train:4801-4900batch: iter_time=7.938e-05, forward_time=0.290, loss_ctc=43.502, loss_att=39.887, acc=0.773, loss=40.971, backward_time=0.294, grad_norm=37.693, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.707e-04, train_time=1.429 +[gpub011:0/16] 2024-02-08 18:38:14,232 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 18:38:15,315 (trainer:762) INFO: 35epoch:train:4901-5000batch: iter_time=7.865e-05, forward_time=0.379, loss_ctc=51.467, loss_att=54.552, acc=0.743, loss=53.627, backward_time=0.330, grad_norm=47.873, clip=100.000, loss_scale=1.033e+34, optim_step_time=0.097, optim0_lr0=1.707e-04, train_time=1.231 +[gpub011:0/16] 2024-02-08 18:38:35,380 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-08 18:38:54,823 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 18:38:58,418 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 18:38:58,418 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-08 18:38:58,423 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 18:46:28,070 (trainer:762) INFO: 35epoch:train:5001-5100batch: iter_time=3.451, forward_time=0.330, loss_ctc=50.569, loss_att=51.223, acc=0.749, loss=51.027, backward_time=0.303, grad_norm=42.476, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.707e-04, train_time=4.927 +[gpub011:0/16] 2024-02-08 18:48:20,726 (trainer:762) INFO: 35epoch:train:5101-5200batch: iter_time=8.608e-05, forward_time=0.289, loss_ctc=45.866, loss_att=40.864, acc=0.774, loss=42.364, backward_time=0.296, grad_norm=41.105, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.707e-04, train_time=1.126 +[gpub011:0/16] 2024-02-08 18:50:18,959 (trainer:762) INFO: 35epoch:train:5201-5300batch: iter_time=8.365e-05, forward_time=0.304, loss_ctc=44.938, loss_att=48.647, acc=0.742, loss=47.535, backward_time=0.300, grad_norm=40.375, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.706e-04, train_time=1.182 +[gpub011:0/16] 2024-02-08 18:52:55,990 (trainer:762) INFO: 35epoch:train:5301-5400batch: iter_time=8.328e-05, forward_time=0.402, loss_ctc=43.532, loss_att=41.342, acc=0.762, loss=41.999, backward_time=0.317, grad_norm=40.071, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.706e-04, train_time=1.570 +[gpub011:0/16] 2024-02-08 18:54:56,362 (trainer:762) INFO: 35epoch:train:5401-5500batch: iter_time=8.131e-05, forward_time=0.292, loss_ctc=52.406, loss_att=54.709, acc=0.751, loss=54.018, backward_time=0.298, grad_norm=42.069, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.706e-04, train_time=1.203 +[gpub011:0/16] 2024-02-08 18:57:03,482 (trainer:762) INFO: 35epoch:train:5501-5600batch: iter_time=2.317e-04, forward_time=0.297, loss_ctc=44.690, loss_att=46.318, acc=0.758, loss=45.829, backward_time=0.298, grad_norm=42.482, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.706e-04, train_time=1.269 +[gpub011:0/16] 2024-02-08 18:59:36,343 (trainer:762) INFO: 35epoch:train:5601-5700batch: iter_time=8.516e-05, forward_time=0.414, loss_ctc=43.490, loss_att=39.381, acc=0.779, loss=40.613, backward_time=0.329, grad_norm=35.217, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.706e-04, train_time=1.530 +[gpub011:0/16] 2024-02-08 19:01:34,739 (trainer:762) INFO: 35epoch:train:5701-5800batch: iter_time=8.438e-05, forward_time=0.290, loss_ctc=45.381, loss_att=49.720, acc=0.739, loss=48.418, backward_time=0.296, grad_norm=43.436, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.706e-04, train_time=1.184 +[gpub011:0/16] 2024-02-08 19:03:54,885 (trainer:762) INFO: 35epoch:train:5801-5900batch: iter_time=9.150e-05, forward_time=0.389, loss_ctc=37.943, loss_att=40.112, acc=0.732, loss=39.461, backward_time=0.322, grad_norm=37.861, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.705e-04, train_time=1.401 +[gpub011:0/16] 2024-02-08 19:06:03,793 (trainer:762) INFO: 35epoch:train:5901-6000batch: iter_time=8.036e-05, forward_time=0.291, loss_ctc=45.848, loss_att=43.967, acc=0.758, loss=44.531, backward_time=0.296, grad_norm=39.358, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.705e-04, train_time=1.289 +[gpub011:0/16] 2024-02-08 19:08:01,907 (trainer:762) INFO: 35epoch:train:6001-6100batch: iter_time=8.391e-05, forward_time=0.288, loss_ctc=40.421, loss_att=39.351, acc=0.749, loss=39.672, backward_time=0.294, grad_norm=42.696, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.705e-04, train_time=1.181 +[gpub011:0/16] 2024-02-08 19:10:35,013 (trainer:762) INFO: 35epoch:train:6101-6200batch: iter_time=8.144e-05, forward_time=0.373, loss_ctc=46.913, loss_att=45.078, acc=0.759, loss=45.628, backward_time=0.350, grad_norm=42.332, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.705e-04, train_time=1.531 +[gpub011:0/16] 2024-02-08 19:11:50,464 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-08 19:12:09,773 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 19:12:13,297 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 19:12:13,297 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-08 19:12:13,326 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 19:18:14,845 (trainer:762) INFO: 35epoch:train:6201-6300batch: iter_time=3.329, forward_time=0.294, loss_ctc=48.464, loss_att=51.029, acc=0.759, loss=50.259, backward_time=0.299, grad_norm=40.231, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.705e-04, train_time=4.598 +[gpub011:0/16] 2024-02-08 19:20:28,023 (trainer:762) INFO: 35epoch:train:6301-6400batch: iter_time=7.953e-05, forward_time=0.325, loss_ctc=48.459, loss_att=49.437, acc=0.758, loss=49.144, backward_time=0.297, grad_norm=42.080, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.705e-04, train_time=1.332 +[gpub011:0/16] 2024-02-08 19:22:36,518 (trainer:762) INFO: 35epoch:train:6401-6500batch: iter_time=8.840e-05, forward_time=0.387, loss_ctc=45.396, loss_att=37.598, acc=0.770, loss=39.937, backward_time=0.311, grad_norm=39.310, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.704e-04, train_time=1.284 +[gpub011:0/16] 2024-02-08 19:24:48,639 (trainer:762) INFO: 35epoch:train:6501-6600batch: iter_time=9.132e-05, forward_time=0.315, loss_ctc=44.760, loss_att=48.182, acc=0.748, loss=47.156, backward_time=0.307, grad_norm=40.693, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.704e-04, train_time=1.321 +[gpub011:0/16] 2024-02-08 19:27:20,669 (trainer:762) INFO: 35epoch:train:6601-6700batch: iter_time=3.974e-04, forward_time=0.382, loss_ctc=47.662, loss_att=50.106, acc=0.742, loss=49.373, backward_time=0.336, grad_norm=43.269, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.704e-04, train_time=1.520 +[gpub011:0/16] 2024-02-08 19:29:12,393 (trainer:762) INFO: 35epoch:train:6701-6800batch: iter_time=9.214e-05, forward_time=0.292, loss_ctc=48.333, loss_att=49.782, acc=0.771, loss=49.347, backward_time=0.299, grad_norm=39.791, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.704e-04, train_time=1.117 +[gpub011:0/16] 2024-02-08 19:31:04,653 (trainer:762) INFO: 35epoch:train:6801-6900batch: iter_time=9.251e-05, forward_time=0.290, loss_ctc=42.841, loss_att=42.201, acc=0.769, loss=42.393, backward_time=0.297, grad_norm=39.374, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.704e-04, train_time=1.122 +[gpub011:0/16] 2024-02-08 19:34:01,732 (trainer:762) INFO: 35epoch:train:6901-7000batch: iter_time=9.549e-05, forward_time=0.363, loss_ctc=45.481, loss_att=40.899, acc=0.762, loss=42.274, backward_time=0.384, grad_norm=43.082, clip=100.000, loss_scale=5.244e+33, optim_step_time=0.107, optim0_lr0=1.704e-04, train_time=1.770 +[gpub011:0/16] 2024-02-08 19:35:53,883 (trainer:762) INFO: 35epoch:train:7001-7100batch: iter_time=8.743e-05, forward_time=0.291, loss_ctc=42.317, loss_att=49.556, acc=0.727, loss=47.384, backward_time=0.297, grad_norm=38.802, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.703e-04, train_time=1.122 +[gpub011:0/16] 2024-02-08 19:37:45,968 (trainer:762) INFO: 35epoch:train:7101-7200batch: iter_time=9.046e-05, forward_time=0.288, loss_ctc=40.176, loss_att=40.876, acc=0.742, loss=40.666, backward_time=0.295, grad_norm=37.388, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.703e-04, train_time=1.121 +[gpub011:0/16] 2024-02-08 19:40:53,828 (trainer:762) INFO: 35epoch:train:7201-7300batch: iter_time=8.949e-05, forward_time=0.403, loss_ctc=41.628, loss_att=39.644, acc=0.756, loss=40.239, backward_time=0.324, grad_norm=38.920, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.703e-04, train_time=1.878 +[gpub011:0/16] 2024-02-08 19:42:46,853 (trainer:762) INFO: 35epoch:train:7301-7400batch: iter_time=8.687e-05, forward_time=0.289, loss_ctc=42.846, loss_att=39.231, acc=0.774, loss=40.315, backward_time=0.296, grad_norm=38.829, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.703e-04, train_time=1.130 +[gpub011:0/16] 2024-02-08 19:44:41,931 (trainer:762) INFO: 35epoch:train:7401-7500batch: iter_time=8.511e-05, forward_time=0.293, loss_ctc=51.566, loss_att=54.427, acc=0.744, loss=53.569, backward_time=0.299, grad_norm=47.217, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.703e-04, train_time=1.151 +[gpub011:0/16] 2024-02-08 19:45:03,709 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-08 19:45:23,393 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 19:45:27,208 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 19:45:27,208 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-08 19:45:27,748 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 19:51:52,611 (trainer:762) INFO: 35epoch:train:7501-7600batch: iter_time=3.107, forward_time=0.341, loss_ctc=49.761, loss_att=50.355, acc=0.752, loss=50.177, backward_time=0.307, grad_norm=42.413, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.703e-04, train_time=4.307 +[gpub011:0/16] 2024-02-08 19:53:49,125 (trainer:762) INFO: 35epoch:train:7601-7700batch: iter_time=8.426e-05, forward_time=0.323, loss_ctc=45.900, loss_att=40.864, acc=0.776, loss=42.375, backward_time=0.336, grad_norm=39.454, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.702e-04, train_time=1.165 +[gpub011:0/16] 2024-02-08 19:56:15,008 (trainer:762) INFO: 35epoch:train:7701-7800batch: iter_time=8.606e-05, forward_time=0.337, loss_ctc=45.050, loss_att=48.289, acc=0.746, loss=47.317, backward_time=0.330, grad_norm=40.558, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.702e-04, train_time=1.458 +[gpub011:0/16] 2024-02-08 19:58:28,807 (trainer:762) INFO: 35epoch:train:7801-7900batch: iter_time=1.849e-04, forward_time=0.312, loss_ctc=43.123, loss_att=39.631, acc=0.769, loss=40.678, backward_time=0.333, grad_norm=37.318, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.702e-04, train_time=1.338 +[gpub011:0/16] 2024-02-08 20:00:39,251 (trainer:762) INFO: 35epoch:train:7901-8000batch: iter_time=8.809e-05, forward_time=0.367, loss_ctc=52.111, loss_att=54.588, acc=0.753, loss=53.845, backward_time=0.317, grad_norm=43.291, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.702e-04, train_time=1.304 +[gpub011:0/16] 2024-02-08 20:03:10,595 (trainer:762) INFO: 35epoch:train:8001-8100batch: iter_time=8.537e-05, forward_time=0.328, loss_ctc=44.870, loss_att=46.034, acc=0.760, loss=45.685, backward_time=0.374, grad_norm=44.282, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.702e-04, train_time=1.514 +[gpub011:0/16] 2024-02-08 20:03:44,239 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 20:05:29,812 (trainer:762) INFO: 35epoch:train:8101-8200batch: iter_time=2.051e-04, forward_time=0.375, loss_ctc=43.310, loss_att=39.193, acc=0.779, loss=40.428, backward_time=0.306, grad_norm=37.288, clip=100.000, loss_scale=6.713e+33, optim_step_time=0.095, optim0_lr0=1.702e-04, train_time=1.392 +[gpub011:0/16] 2024-02-08 20:08:02,413 (trainer:762) INFO: 35epoch:train:8201-8300batch: iter_time=0.001, forward_time=0.348, loss_ctc=45.059, loss_att=48.986, acc=0.744, loss=47.808, backward_time=0.306, grad_norm=42.348, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.124, optim0_lr0=1.701e-04, train_time=1.525 +[gpub011:0/16] 2024-02-08 20:10:03,073 (trainer:762) INFO: 35epoch:train:8301-8400batch: iter_time=7.997e-05, forward_time=0.319, loss_ctc=37.702, loss_att=39.469, acc=0.734, loss=38.939, backward_time=0.338, grad_norm=36.804, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.701e-04, train_time=1.207 +[gpub011:0/16] 2024-02-08 20:12:16,342 (trainer:762) INFO: 35epoch:train:8401-8500batch: iter_time=2.990e-04, forward_time=0.336, loss_ctc=45.461, loss_att=43.895, acc=0.760, loss=44.365, backward_time=0.318, grad_norm=38.243, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.701e-04, train_time=1.333 +[gpub011:0/16] 2024-02-08 20:14:41,798 (trainer:762) INFO: 35epoch:train:8501-8600batch: iter_time=8.409e-05, forward_time=0.287, loss_ctc=39.820, loss_att=38.713, acc=0.754, loss=39.045, backward_time=0.292, grad_norm=43.365, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.701e-04, train_time=1.453 +[gpub011:0/16] 2024-02-08 20:16:41,978 (trainer:762) INFO: 35epoch:train:8601-8700batch: iter_time=2.369e-04, forward_time=0.318, loss_ctc=46.927, loss_att=44.429, acc=0.762, loss=45.178, backward_time=0.330, grad_norm=42.378, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.701e-04, train_time=1.202 +[gpub011:0/16] 2024-02-08 20:18:17,327 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-08 20:18:36,562 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 20:18:40,842 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 20:18:40,842 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-08 20:18:40,845 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 20:24:37,310 (trainer:762) INFO: 35epoch:train:8701-8800batch: iter_time=3.280, forward_time=0.338, loss_ctc=47.680, loss_att=53.307, acc=0.763, loss=51.619, backward_time=0.313, grad_norm=42.356, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.701e-04, train_time=4.754 +[gpub011:0/16] 2024-02-08 20:26:39,425 (trainer:762) INFO: 35epoch:train:8801-8900batch: iter_time=7.482e-05, forward_time=0.344, loss_ctc=48.656, loss_att=49.518, acc=0.768, loss=49.259, backward_time=0.312, grad_norm=41.560, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.700e-04, train_time=1.221 +[gpub011:0/16] 2024-02-08 20:29:16,345 (trainer:762) INFO: 35epoch:train:8901-9000batch: iter_time=8.769e-05, forward_time=0.357, loss_ctc=44.505, loss_att=36.520, acc=0.777, loss=38.915, backward_time=0.305, grad_norm=39.443, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.700e-04, train_time=1.569 +[gpub011:0/16] 2024-02-08 20:31:34,613 (trainer:762) INFO: 35epoch:train:9001-9100batch: iter_time=8.683e-05, forward_time=0.291, loss_ctc=44.470, loss_att=50.375, acc=0.759, loss=48.603, backward_time=0.299, grad_norm=38.169, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.700e-04, train_time=1.383 +[gpub011:0/16] 2024-02-08 20:33:50,613 (trainer:762) INFO: 35epoch:train:9101-9200batch: iter_time=8.822e-05, forward_time=0.342, loss_ctc=47.384, loss_att=51.545, acc=0.751, loss=50.297, backward_time=0.313, grad_norm=45.408, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.700e-04, train_time=1.360 +[gpub011:0/16] 2024-02-08 20:35:57,317 (trainer:762) INFO: 35epoch:train:9201-9300batch: iter_time=9.331e-05, forward_time=0.342, loss_ctc=48.142, loss_att=50.090, acc=0.780, loss=49.506, backward_time=0.322, grad_norm=39.352, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.700e-04, train_time=1.267 +[gpub011:0/16] 2024-02-08 20:38:22,997 (trainer:762) INFO: 35epoch:train:9301-9400batch: iter_time=8.361e-05, forward_time=0.290, loss_ctc=42.351, loss_att=42.275, acc=0.779, loss=42.298, backward_time=0.295, grad_norm=37.306, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.700e-04, train_time=1.457 +[gpub011:0/16] 2024-02-08 20:40:51,183 (trainer:762) INFO: 35epoch:train:9401-9500batch: iter_time=1.713e-04, forward_time=0.379, loss_ctc=45.294, loss_att=42.202, acc=0.768, loss=43.130, backward_time=0.335, grad_norm=42.347, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=1.700e-04, train_time=1.482 +[gpub011:0/16] 2024-02-08 20:43:01,096 (trainer:762) INFO: 35epoch:train:9501-9600batch: iter_time=8.735e-05, forward_time=0.292, loss_ctc=42.472, loss_att=51.098, acc=0.735, loss=48.510, backward_time=0.297, grad_norm=38.998, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.699e-04, train_time=1.299 +[gpub011:0/16] 2024-02-08 20:44:57,302 (trainer:762) INFO: 35epoch:train:9601-9700batch: iter_time=9.006e-05, forward_time=0.288, loss_ctc=39.746, loss_att=41.994, acc=0.755, loss=41.320, backward_time=0.295, grad_norm=36.572, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.699e-04, train_time=1.162 +[gpub011:0/16] 2024-02-08 20:47:42,163 (trainer:762) INFO: 35epoch:train:9701-9800batch: iter_time=0.010, forward_time=0.393, loss_ctc=41.527, loss_att=38.538, acc=0.768, loss=39.434, backward_time=0.314, grad_norm=37.824, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.699e-04, train_time=1.648 +[gpub011:0/16] 2024-02-08 20:49:52,743 (trainer:762) INFO: 35epoch:train:9801-9900batch: iter_time=8.658e-05, forward_time=0.291, loss_ctc=42.643, loss_att=40.686, acc=0.778, loss=41.273, backward_time=0.296, grad_norm=37.929, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.699e-04, train_time=1.306 +[gpub011:0/16] 2024-02-08 20:51:59,050 (trainer:762) INFO: 35epoch:train:9901-10000batch: iter_time=8.262e-05, forward_time=0.296, loss_ctc=50.652, loss_att=54.438, acc=0.750, loss=53.302, backward_time=0.299, grad_norm=47.787, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.699e-04, train_time=1.263 +[gpub011:0/16] 2024-02-08 20:52:19,078 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-08 20:52:38,476 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 20:52:42,006 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 20:52:42,006 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-08 20:52:42,120 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 20:58:59,059 (trainer:762) INFO: 35epoch:train:10001-10100batch: iter_time=2.983, forward_time=0.379, loss_ctc=49.569, loss_att=51.501, acc=0.750, loss=50.921, backward_time=0.313, grad_norm=42.977, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.699e-04, train_time=4.199 +[gpub011:0/16] 2024-02-08 21:01:11,683 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 21:01:33,144 (trainer:762) INFO: 35epoch:train:10101-10200batch: iter_time=7.809e-05, forward_time=0.290, loss_ctc=45.616, loss_att=41.025, acc=0.775, loss=42.402, backward_time=0.295, grad_norm=39.773, clip=100.000, loss_scale=8.549e+33, optim_step_time=0.092, optim0_lr0=1.698e-04, train_time=1.541 +[gpub011:0/16] 2024-02-08 21:03:44,595 (trainer:762) INFO: 35epoch:train:10201-10300batch: iter_time=9.795e-04, forward_time=0.407, loss_ctc=44.973, loss_att=48.413, acc=0.744, loss=47.381, backward_time=0.342, grad_norm=38.667, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.698e-04, train_time=1.314 +[gpub011:0/16] 2024-02-08 21:05:36,345 (trainer:762) INFO: 35epoch:train:10301-10400batch: iter_time=8.080e-05, forward_time=0.291, loss_ctc=43.072, loss_att=40.852, acc=0.764, loss=41.518, backward_time=0.297, grad_norm=37.487, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.698e-04, train_time=1.116 +[gpub011:0/16] 2024-02-08 21:08:09,914 (trainer:762) INFO: 35epoch:train:10401-10500batch: iter_time=8.249e-05, forward_time=0.292, loss_ctc=51.402, loss_att=54.047, acc=0.754, loss=53.253, backward_time=0.297, grad_norm=43.113, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.698e-04, train_time=1.537 +[gpub011:0/16] 2024-02-08 21:10:14,198 (trainer:762) INFO: 35epoch:train:10501-10600batch: iter_time=8.035e-05, forward_time=0.371, loss_ctc=44.532, loss_att=46.128, acc=0.760, loss=45.650, backward_time=0.343, grad_norm=41.565, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.698e-04, train_time=1.243 +[gpub011:0/16] 2024-02-08 21:12:06,424 (trainer:762) INFO: 35epoch:train:10601-10700batch: iter_time=8.089e-05, forward_time=0.291, loss_ctc=43.144, loss_att=39.375, acc=0.779, loss=40.506, backward_time=0.297, grad_norm=36.696, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.698e-04, train_time=1.121 +[gpub011:0/16] 2024-02-08 21:14:55,310 (trainer:762) INFO: 35epoch:train:10701-10800batch: iter_time=3.108e-04, forward_time=0.350, loss_ctc=45.068, loss_att=49.838, acc=0.741, loss=48.407, backward_time=0.362, grad_norm=45.312, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.697e-04, train_time=1.689 +[gpub011:0/16] 2024-02-08 21:16:44,179 (trainer:762) INFO: 35epoch:train:10801-10900batch: iter_time=8.138e-05, forward_time=0.286, loss_ctc=37.492, loss_att=39.801, acc=0.735, loss=39.108, backward_time=0.292, grad_norm=37.464, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.697e-04, train_time=1.089 +[gpub011:0/16] 2024-02-08 21:18:36,060 (trainer:762) INFO: 35epoch:train:10901-11000batch: iter_time=8.235e-05, forward_time=0.292, loss_ctc=45.287, loss_att=43.543, acc=0.761, loss=44.066, backward_time=0.298, grad_norm=38.633, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.697e-04, train_time=1.118 +[gpub011:0/16] 2024-02-08 21:21:30,418 (trainer:762) INFO: 35epoch:train:11001-11100batch: iter_time=3.702e-04, forward_time=0.475, loss_ctc=39.672, loss_att=38.998, acc=0.752, loss=39.200, backward_time=0.314, grad_norm=43.315, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.697e-04, train_time=1.744 +[gpub011:0/16] 2024-02-08 21:23:24,395 (trainer:762) INFO: 35epoch:train:11101-11200batch: iter_time=8.145e-05, forward_time=0.289, loss_ctc=46.528, loss_att=44.699, acc=0.761, loss=45.248, backward_time=0.296, grad_norm=42.214, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.697e-04, train_time=1.140 +[gpub011:0/16] 2024-02-08 21:24:40,482 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-08 21:24:59,557 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 21:25:03,073 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 21:25:03,073 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-08 21:25:03,076 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 21:31:05,288 (trainer:762) INFO: 35epoch:train:11201-11300batch: iter_time=3.367, forward_time=0.391, loss_ctc=47.328, loss_att=52.009, acc=0.766, loss=50.605, backward_time=0.320, grad_norm=43.259, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.697e-04, train_time=4.608 +[gpub011:0/16] 2024-02-08 21:33:23,575 (trainer:762) INFO: 35epoch:train:11301-11400batch: iter_time=8.186e-05, forward_time=0.291, loss_ctc=47.756, loss_att=49.340, acc=0.766, loss=48.865, backward_time=0.298, grad_norm=40.045, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.696e-04, train_time=1.383 +[gpub011:0/16] 2024-02-08 21:35:31,876 (trainer:762) INFO: 35epoch:train:11401-11500batch: iter_time=7.973e-05, forward_time=0.419, loss_ctc=44.977, loss_att=36.929, acc=0.775, loss=39.343, backward_time=0.314, grad_norm=39.771, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.696e-04, train_time=1.282 +[gpub011:0/16] 2024-02-08 21:37:09,795 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-08 21:37:42,660 (trainer:762) INFO: 35epoch:train:11501-11600batch: iter_time=8.252e-05, forward_time=0.293, loss_ctc=44.107, loss_att=50.076, acc=0.760, loss=48.286, backward_time=0.299, grad_norm=39.493, clip=100.000, loss_scale=4.484e+33, optim_step_time=0.093, optim0_lr0=1.696e-04, train_time=1.308 +[gpub011:0/16] 2024-02-08 21:40:07,349 (trainer:762) INFO: 35epoch:train:11601-11700batch: iter_time=8.242e-05, forward_time=0.387, loss_ctc=46.879, loss_att=49.905, acc=0.755, loss=48.997, backward_time=0.331, grad_norm=42.329, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.696e-04, train_time=1.447 +[gpub011:0/16] 2024-02-08 21:42:42,046 (trainer:762) INFO: 35epoch:train:11701-11800batch: iter_time=8.491e-05, forward_time=0.418, loss_ctc=47.768, loss_att=49.376, acc=0.782, loss=48.893, backward_time=0.342, grad_norm=40.306, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.696e-04, train_time=1.547 +[gpub011:0/16] 2024-02-08 21:44:43,863 (trainer:762) INFO: 35epoch:train:11801-11900batch: iter_time=8.267e-05, forward_time=0.290, loss_ctc=41.948, loss_att=42.357, acc=0.778, loss=42.234, backward_time=0.298, grad_norm=39.718, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.696e-04, train_time=1.217 +[gpub011:0/16] 2024-02-08 21:47:13,000 (trainer:762) INFO: 35epoch:train:11901-12000batch: iter_time=7.876e-05, forward_time=0.370, loss_ctc=44.741, loss_att=42.366, acc=0.767, loss=43.079, backward_time=0.364, grad_norm=44.210, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.695e-04, train_time=1.492 +[gpub011:0/16] 2024-02-08 21:49:26,143 (trainer:762) INFO: 35epoch:train:12001-12100batch: iter_time=8.484e-05, forward_time=0.315, loss_ctc=41.803, loss_att=50.899, acc=0.733, loss=48.170, backward_time=0.296, grad_norm=39.359, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.695e-04, train_time=1.331 +[gpub011:0/16] 2024-02-08 21:51:48,444 (trainer:762) INFO: 35epoch:train:12101-12200batch: iter_time=4.157e-04, forward_time=0.390, loss_ctc=39.469, loss_att=41.190, acc=0.758, loss=40.673, backward_time=0.310, grad_norm=37.683, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.105, optim0_lr0=1.695e-04, train_time=1.423 +[gpub011:0/16] 2024-02-08 21:54:01,179 (trainer:762) INFO: 35epoch:train:12201-12300batch: iter_time=8.327e-05, forward_time=0.313, loss_ctc=41.039, loss_att=38.570, acc=0.772, loss=39.310, backward_time=0.299, grad_norm=39.219, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.695e-04, train_time=1.328 +[gpub011:0/16] 2024-02-08 21:56:00,264 (trainer:762) INFO: 35epoch:train:12301-12400batch: iter_time=8.400e-05, forward_time=0.291, loss_ctc=42.720, loss_att=41.181, acc=0.777, loss=41.643, backward_time=0.298, grad_norm=39.490, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.695e-04, train_time=1.190 +[gpub011:0/16] 2024-02-08 21:58:22,275 (trainer:762) INFO: 35epoch:train:12401-12500batch: iter_time=8.745e-05, forward_time=0.293, loss_ctc=50.480, loss_att=54.874, acc=0.747, loss=53.556, backward_time=0.299, grad_norm=45.189, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.695e-04, train_time=1.420 +[gpub011:0/16] 2024-02-08 21:58:42,377 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-08 21:59:01,622 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 21:59:05,228 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 21:59:05,229 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-08 21:59:05,234 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 22:05:22,645 (trainer:762) INFO: 35epoch:train:12501-12600batch: iter_time=2.997, forward_time=0.376, loss_ctc=49.779, loss_att=50.558, acc=0.764, loss=50.324, backward_time=0.315, grad_norm=40.296, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.694e-04, train_time=4.203 +[gpub011:0/16] 2024-02-08 22:07:34,873 (trainer:762) INFO: 35epoch:train:12601-12700batch: iter_time=8.326e-05, forward_time=0.289, loss_ctc=45.690, loss_att=40.432, acc=0.786, loss=42.009, backward_time=0.296, grad_norm=38.250, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.694e-04, train_time=1.322 +[gpub011:0/16] 2024-02-08 22:09:49,105 (trainer:762) INFO: 35epoch:train:12701-12800batch: iter_time=8.435e-05, forward_time=0.290, loss_ctc=44.429, loss_att=47.755, acc=0.756, loss=46.757, backward_time=0.296, grad_norm=39.915, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.694e-04, train_time=1.343 +[gpub011:0/16] 2024-02-08 22:11:56,596 (trainer:762) INFO: 35epoch:train:12801-12900batch: iter_time=3.370e-04, forward_time=0.398, loss_ctc=42.789, loss_att=42.137, acc=0.773, loss=42.333, backward_time=0.320, grad_norm=38.203, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=1.694e-04, train_time=1.274 +[gpub011:0/16] 2024-02-08 22:14:03,801 (trainer:762) INFO: 35epoch:train:12901-13000batch: iter_time=8.662e-05, forward_time=0.294, loss_ctc=51.502, loss_att=54.399, acc=0.758, loss=53.530, backward_time=0.301, grad_norm=44.360, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.694e-04, train_time=1.272 +[gpub011:0/16] 2024-02-08 22:16:27,743 (trainer:762) INFO: 35epoch:train:13001-13100batch: iter_time=9.292e-05, forward_time=0.290, loss_ctc=44.022, loss_att=45.925, acc=0.771, loss=45.354, backward_time=0.296, grad_norm=42.108, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.694e-04, train_time=1.439 +[gpub011:0/16] 2024-02-08 22:18:35,170 (trainer:762) INFO: 35epoch:train:13101-13200batch: iter_time=7.688e-04, forward_time=0.400, loss_ctc=43.482, loss_att=40.484, acc=0.788, loss=41.384, backward_time=0.331, grad_norm=36.618, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.693e-04, train_time=1.274 +[gpub011:0/16] 2024-02-08 22:20:48,146 (trainer:762) INFO: 35epoch:train:13201-13300batch: iter_time=8.557e-05, forward_time=0.316, loss_ctc=44.772, loss_att=48.718, acc=0.754, loss=47.534, backward_time=0.299, grad_norm=45.214, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.693e-04, train_time=1.329 +[gpub011:0/16] 2024-02-08 22:23:12,375 (trainer:762) INFO: 35epoch:train:13301-13400batch: iter_time=8.709e-05, forward_time=0.287, loss_ctc=37.393, loss_att=40.554, acc=0.743, loss=39.606, backward_time=0.291, grad_norm=36.669, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.693e-04, train_time=1.443 +[gpub011:0/16] 2024-02-08 22:25:01,457 (trainer:762) INFO: 35epoch:train:13401-13500batch: iter_time=8.873e-05, forward_time=0.292, loss_ctc=45.378, loss_att=45.411, acc=0.767, loss=45.401, backward_time=0.299, grad_norm=37.310, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.693e-04, train_time=1.090 +[gpub011:0/16] 2024-02-08 22:27:36,472 (trainer:762) INFO: 35epoch:train:13501-13600batch: iter_time=9.312e-05, forward_time=0.373, loss_ctc=39.617, loss_att=39.914, acc=0.760, loss=39.825, backward_time=0.340, grad_norm=41.268, clip=100.000, loss_scale=3.297e+33, optim_step_time=0.100, optim0_lr0=1.693e-04, train_time=1.550 +[gpub011:0/16] 2024-02-08 22:29:49,637 (trainer:762) INFO: 35epoch:train:13601-13700batch: iter_time=8.684e-05, forward_time=0.290, loss_ctc=46.498, loss_att=44.716, acc=0.768, loss=45.250, backward_time=0.296, grad_norm=41.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.693e-04, train_time=1.331 +[gpub011:0/16] 2024-02-08 22:31:04,564 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-08 22:31:24,126 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 22:31:27,651 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 22:31:27,651 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-08 22:31:27,670 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 22:38:02,019 (trainer:762) INFO: 35epoch:train:13701-13800batch: iter_time=3.447, forward_time=0.415, loss_ctc=47.376, loss_att=52.585, acc=0.757, loss=51.022, backward_time=0.321, grad_norm=40.850, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.693e-04, train_time=4.924 +[gpub011:0/16] 2024-02-08 22:40:26,706 (trainer:762) INFO: 35epoch:train:13801-13900batch: iter_time=8.454e-05, forward_time=0.290, loss_ctc=48.164, loss_att=50.192, acc=0.755, loss=49.584, backward_time=0.296, grad_norm=43.181, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.692e-04, train_time=1.446 +[gpub011:0/16] 2024-02-08 22:42:35,164 (trainer:762) INFO: 35epoch:train:13901-14000batch: iter_time=2.080e-04, forward_time=0.384, loss_ctc=44.964, loss_att=36.956, acc=0.774, loss=39.358, backward_time=0.330, grad_norm=39.673, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.692e-04, train_time=1.285 +[gpub011:0/16] 2024-02-08 22:45:16,456 (trainer:762) INFO: 35epoch:train:14001-14100batch: iter_time=8.961e-05, forward_time=0.337, loss_ctc=44.514, loss_att=48.795, acc=0.750, loss=47.511, backward_time=0.369, grad_norm=39.825, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.692e-04, train_time=1.613 +[gpub011:0/16] 2024-02-08 22:47:29,304 (trainer:762) INFO: 35epoch:train:14101-14200batch: iter_time=8.062e-05, forward_time=0.289, loss_ctc=47.058, loss_att=50.348, acc=0.744, loss=49.361, backward_time=0.296, grad_norm=42.994, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.692e-04, train_time=1.328 +[gpub011:0/16] 2024-02-08 22:49:56,249 (trainer:762) INFO: 35epoch:train:14201-14300batch: iter_time=2.434e-04, forward_time=0.359, loss_ctc=47.510, loss_att=49.310, acc=0.773, loss=48.770, backward_time=0.385, grad_norm=39.118, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.692e-04, train_time=1.469 +[gpub011:0/16] 2024-02-08 22:52:19,182 (trainer:762) INFO: 35epoch:train:14301-14400batch: iter_time=8.250e-05, forward_time=0.291, loss_ctc=41.962, loss_att=41.535, acc=0.771, loss=41.663, backward_time=0.295, grad_norm=38.412, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.692e-04, train_time=1.430 +[gpub011:0/16] 2024-02-08 22:54:32,164 (trainer:762) INFO: 35epoch:train:14401-14500batch: iter_time=8.527e-05, forward_time=0.365, loss_ctc=44.822, loss_att=40.087, acc=0.765, loss=41.507, backward_time=0.415, grad_norm=41.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.691e-04, train_time=1.330 +[gpub011:0/16] 2024-02-08 22:56:54,728 (trainer:762) INFO: 35epoch:train:14501-14600batch: iter_time=8.515e-05, forward_time=0.289, loss_ctc=41.578, loss_att=48.627, acc=0.732, loss=46.512, backward_time=0.295, grad_norm=38.349, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.691e-04, train_time=1.425 +[gpub011:0/16] 2024-02-08 22:59:26,260 (trainer:762) INFO: 35epoch:train:14601-14700batch: iter_time=8.721e-05, forward_time=0.403, loss_ctc=39.454, loss_att=40.896, acc=0.742, loss=40.464, backward_time=0.343, grad_norm=36.429, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.691e-04, train_time=1.515 +[gpub011:0/16] 2024-02-08 23:01:16,628 (trainer:762) INFO: 35epoch:train:14701-14800batch: iter_time=7.930e-05, forward_time=0.289, loss_ctc=40.869, loss_att=38.929, acc=0.761, loss=39.511, backward_time=0.295, grad_norm=38.398, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.691e-04, train_time=1.104 +[gpub011:0/16] 2024-02-08 23:03:52,005 (trainer:762) INFO: 35epoch:train:14801-14900batch: iter_time=3.847e-04, forward_time=0.354, loss_ctc=42.701, loss_att=39.480, acc=0.775, loss=40.446, backward_time=0.339, grad_norm=39.202, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.691e-04, train_time=1.553 +[gpub011:0/16] 2024-02-08 23:06:08,993 (trainer:762) INFO: 35epoch:train:14901-15000batch: iter_time=8.532e-05, forward_time=0.292, loss_ctc=50.772, loss_att=54.006, acc=0.746, loss=53.036, backward_time=0.298, grad_norm=48.033, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.691e-04, train_time=1.370 +[gpub011:0/16] 2024-02-08 23:44:34,678 (trainer:361) INFO: 35epoch results: [train] iter_time=0.264, forward_time=0.331, loss_ctc=45.292, loss_att=45.681, acc=0.759, loss=45.564, backward_time=0.313, grad_norm=41.074, clip=100.000, loss_scale=5.891e+33, optim_step_time=0.096, optim0_lr0=1.703e-04, train_time=1.599, time=6 hours, 40 minutes and 18.97 seconds, total_count=555000, gpu_max_cached_mem_GB=43.805, [valid] loss_ctc=35.486, cer_ctc=0.180, loss_att=39.286, acc=0.674, cer=0.360, wer=1.000, loss=38.146, time=38 minutes and 1.37 seconds, total_count=172827, gpu_max_cached_mem_GB=43.805 +[gpub011:0/16] 2024-02-08 23:44:44,073 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-08 23:44:44,229 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/30epoch.pth +[gpub011:0/16] 2024-02-08 23:44:44,230 (trainer:290) INFO: 36/45epoch started. Estimated time to finish: 3 days, 2 hours and 37 minutes +[gpub011:0/16] 2024-02-08 23:44:44,240 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-08 23:45:02,702 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-08 23:45:06,037 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-08 23:45:06,037 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-08 23:45:06,041 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-08 23:52:11,802 (trainer:762) INFO: 36epoch:train:1-100batch: iter_time=3.271, forward_time=0.380, loss_ctc=55.520, loss_att=53.362, acc=0.730, loss=54.010, backward_time=0.312, grad_norm=56.776, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.690e-04, train_time=4.475 +[gpub011:0/16] 2024-02-08 23:54:07,592 (trainer:762) INFO: 36epoch:train:101-200batch: iter_time=8.964e-05, forward_time=0.289, loss_ctc=50.599, loss_att=47.254, acc=0.740, loss=48.257, backward_time=0.296, grad_norm=46.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.690e-04, train_time=1.158 +[gpub011:0/16] 2024-02-08 23:56:49,513 (trainer:762) INFO: 36epoch:train:201-300batch: iter_time=4.791e-04, forward_time=0.377, loss_ctc=56.336, loss_att=42.323, acc=0.748, loss=46.527, backward_time=0.326, grad_norm=46.904, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.690e-04, train_time=1.619 +[gpub011:0/16] 2024-02-08 23:58:56,273 (trainer:762) INFO: 36epoch:train:301-400batch: iter_time=9.297e-05, forward_time=0.292, loss_ctc=48.413, loss_att=51.873, acc=0.740, loss=50.835, backward_time=0.298, grad_norm=40.777, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.690e-04, train_time=1.267 +[gpub011:0/16] 2024-02-09 00:01:14,702 (trainer:762) INFO: 36epoch:train:401-500batch: iter_time=9.008e-05, forward_time=0.290, loss_ctc=48.716, loss_att=50.913, acc=0.743, loss=50.254, backward_time=0.294, grad_norm=42.127, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.690e-04, train_time=1.384 +[gpub011:0/16] 2024-02-09 00:03:37,206 (trainer:762) INFO: 36epoch:train:501-600batch: iter_time=9.277e-05, forward_time=0.361, loss_ctc=48.919, loss_att=51.870, acc=0.754, loss=50.984, backward_time=0.381, grad_norm=43.525, clip=100.000, loss_scale=6.594e+33, optim_step_time=0.098, optim0_lr0=1.690e-04, train_time=1.423 +[gpub011:0/16] 2024-02-09 00:05:54,635 (trainer:762) INFO: 36epoch:train:601-700batch: iter_time=8.838e-05, forward_time=0.293, loss_ctc=56.065, loss_att=50.269, acc=0.753, loss=52.008, backward_time=0.296, grad_norm=47.324, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.689e-04, train_time=1.375 +[gpub011:0/16] 2024-02-09 00:08:05,254 (trainer:762) INFO: 36epoch:train:701-800batch: iter_time=8.938e-05, forward_time=0.288, loss_ctc=46.160, loss_att=42.762, acc=0.752, loss=43.781, backward_time=0.295, grad_norm=43.400, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.689e-04, train_time=1.306 +[gpub011:0/16] 2024-02-09 00:10:09,330 (trainer:762) INFO: 36epoch:train:801-900batch: iter_time=9.187e-05, forward_time=0.292, loss_ctc=54.466, loss_att=53.232, acc=0.752, loss=53.602, backward_time=0.298, grad_norm=45.339, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.689e-04, train_time=1.241 +[gpub011:0/16] 2024-02-09 00:12:41,213 (trainer:762) INFO: 36epoch:train:901-1000batch: iter_time=9.122e-05, forward_time=0.413, loss_ctc=52.733, loss_att=47.595, acc=0.742, loss=49.136, backward_time=0.336, grad_norm=50.043, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.689e-04, train_time=1.518 +[gpub011:0/16] 2024-02-09 00:14:50,094 (trainer:762) INFO: 36epoch:train:1001-1100batch: iter_time=8.550e-05, forward_time=0.289, loss_ctc=49.904, loss_att=46.845, acc=0.741, loss=47.762, backward_time=0.295, grad_norm=45.971, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.689e-04, train_time=1.289 +[gpub011:0/16] 2024-02-09 00:17:08,511 (trainer:762) INFO: 36epoch:train:1101-1200batch: iter_time=8.717e-05, forward_time=0.289, loss_ctc=46.462, loss_att=44.912, acc=0.741, loss=45.377, backward_time=0.295, grad_norm=43.725, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.689e-04, train_time=1.384 +[gpub011:0/16] 2024-02-09 00:18:31,406 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-09 00:18:50,673 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 00:18:54,148 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 00:18:54,148 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-09 00:18:54,238 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 00:24:58,558 (trainer:762) INFO: 36epoch:train:1201-1300batch: iter_time=3.478, forward_time=0.290, loss_ctc=48.101, loss_att=52.399, acc=0.719, loss=51.109, backward_time=0.296, grad_norm=47.966, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.688e-04, train_time=4.700 +[gpub011:0/16] 2024-02-09 00:27:24,774 (trainer:762) INFO: 36epoch:train:1301-1400batch: iter_time=8.900e-05, forward_time=0.427, loss_ctc=56.982, loss_att=51.472, acc=0.743, loss=53.125, backward_time=0.334, grad_norm=60.528, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.688e-04, train_time=1.461 +[gpub011:0/16] 2024-02-09 00:28:33,511 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 00:29:37,505 (trainer:762) INFO: 36epoch:train:1401-1500batch: iter_time=8.068e-05, forward_time=0.292, loss_ctc=56.077, loss_att=47.273, acc=0.743, loss=49.914, backward_time=0.295, grad_norm=43.703, clip=100.000, loss_scale=8.024e+33, optim_step_time=0.093, optim0_lr0=1.688e-04, train_time=1.328 +[gpub011:0/16] 2024-02-09 00:31:40,012 (trainer:762) INFO: 36epoch:train:1501-1600batch: iter_time=8.082e-05, forward_time=0.289, loss_ctc=44.167, loss_att=40.885, acc=0.747, loss=41.870, backward_time=0.293, grad_norm=39.740, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.688e-04, train_time=1.225 +[gpub011:0/16] 2024-02-09 00:34:01,153 (trainer:762) INFO: 36epoch:train:1601-1700batch: iter_time=8.832e-05, forward_time=0.347, loss_ctc=51.756, loss_att=58.441, acc=0.734, loss=56.436, backward_time=0.298, grad_norm=44.187, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.688e-04, train_time=1.411 +[gpub011:0/16] 2024-02-09 00:36:15,401 (trainer:762) INFO: 36epoch:train:1701-1800batch: iter_time=9.066e-05, forward_time=0.387, loss_ctc=46.158, loss_att=47.449, acc=0.766, loss=47.061, backward_time=0.326, grad_norm=42.165, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.688e-04, train_time=1.342 +[gpub011:0/16] 2024-02-09 00:38:38,223 (trainer:762) INFO: 36epoch:train:1801-1900batch: iter_time=8.663e-05, forward_time=0.291, loss_ctc=53.608, loss_att=53.314, acc=0.743, loss=53.402, backward_time=0.295, grad_norm=49.441, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.688e-04, train_time=1.429 +[gpub011:0/16] 2024-02-09 00:40:49,260 (trainer:762) INFO: 36epoch:train:1901-2000batch: iter_time=8.303e-05, forward_time=0.290, loss_ctc=46.562, loss_att=42.687, acc=0.760, loss=43.850, backward_time=0.296, grad_norm=40.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.687e-04, train_time=1.310 +[gpub011:0/16] 2024-02-09 00:43:05,340 (trainer:762) INFO: 36epoch:train:2001-2100batch: iter_time=8.693e-05, forward_time=0.441, loss_ctc=48.471, loss_att=48.608, acc=0.750, loss=48.567, backward_time=0.337, grad_norm=42.602, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.687e-04, train_time=1.361 +[gpub011:0/16] 2024-02-09 00:45:17,960 (trainer:762) INFO: 36epoch:train:2101-2200batch: iter_time=8.677e-05, forward_time=0.298, loss_ctc=54.855, loss_att=50.429, acc=0.755, loss=51.757, backward_time=0.297, grad_norm=46.671, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.687e-04, train_time=1.325 +[gpub011:0/16] 2024-02-09 00:47:34,465 (trainer:762) INFO: 36epoch:train:2201-2300batch: iter_time=8.432e-05, forward_time=0.288, loss_ctc=48.152, loss_att=46.316, acc=0.748, loss=46.867, backward_time=0.293, grad_norm=49.159, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.687e-04, train_time=1.366 +[gpub011:0/16] 2024-02-09 00:49:46,284 (trainer:762) INFO: 36epoch:train:2301-2400batch: iter_time=8.544e-05, forward_time=0.304, loss_ctc=46.856, loss_att=43.854, acc=0.736, loss=44.755, backward_time=0.294, grad_norm=39.808, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.687e-04, train_time=1.318 +[gpub011:0/16] 2024-02-09 00:51:53,169 (trainer:762) INFO: 36epoch:train:2401-2500batch: iter_time=8.054e-05, forward_time=0.367, loss_ctc=49.134, loss_att=51.075, acc=0.734, loss=50.493, backward_time=0.371, grad_norm=45.702, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.687e-04, train_time=1.269 +[gpub011:0/16] 2024-02-09 00:52:13,215 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-09 00:52:32,084 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 00:52:35,972 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 00:52:35,972 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-09 00:52:35,976 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 00:59:00,011 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 00:59:27,008 (trainer:762) INFO: 36epoch:train:2501-2600batch: iter_time=3.202, forward_time=0.291, loss_ctc=54.506, loss_att=55.920, acc=0.737, loss=55.496, backward_time=0.297, grad_norm=57.865, clip=100.000, loss_scale=4.694e+33, optim_step_time=0.093, optim0_lr0=1.686e-04, train_time=4.538 +[gpub011:0/16] 2024-02-09 01:01:20,099 (trainer:762) INFO: 36epoch:train:2601-2700batch: iter_time=8.055e-05, forward_time=0.290, loss_ctc=49.114, loss_att=47.359, acc=0.758, loss=47.885, backward_time=0.298, grad_norm=43.253, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.686e-04, train_time=1.131 +[gpub011:0/16] 2024-02-09 01:03:27,042 (trainer:762) INFO: 36epoch:train:2701-2800batch: iter_time=8.253e-05, forward_time=0.293, loss_ctc=52.924, loss_att=43.449, acc=0.757, loss=46.291, backward_time=0.306, grad_norm=41.616, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.686e-04, train_time=1.269 +[gpub011:0/16] 2024-02-09 01:05:57,483 (trainer:762) INFO: 36epoch:train:2801-2900batch: iter_time=8.604e-05, forward_time=0.393, loss_ctc=47.349, loss_att=51.400, acc=0.754, loss=50.184, backward_time=0.329, grad_norm=42.042, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.686e-04, train_time=1.504 +[gpub011:0/16] 2024-02-09 01:07:53,434 (trainer:762) INFO: 36epoch:train:2901-3000batch: iter_time=8.326e-05, forward_time=0.293, loss_ctc=47.600, loss_att=50.837, acc=0.755, loss=49.866, backward_time=0.300, grad_norm=40.037, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.686e-04, train_time=1.159 +[gpub011:0/16] 2024-02-09 01:10:06,429 (trainer:762) INFO: 36epoch:train:3001-3100batch: iter_time=8.609e-05, forward_time=0.291, loss_ctc=48.052, loss_att=51.569, acc=0.759, loss=50.514, backward_time=0.297, grad_norm=41.270, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.686e-04, train_time=1.330 +[gpub011:0/16] 2024-02-09 01:12:20,306 (trainer:762) INFO: 36epoch:train:3101-3200batch: iter_time=8.816e-05, forward_time=0.323, loss_ctc=52.594, loss_att=49.027, acc=0.763, loss=50.097, backward_time=0.299, grad_norm=43.330, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.685e-04, train_time=1.338 +[gpub011:0/16] 2024-02-09 01:14:46,256 (trainer:762) INFO: 36epoch:train:3201-3300batch: iter_time=8.569e-05, forward_time=0.390, loss_ctc=44.558, loss_att=42.779, acc=0.761, loss=43.313, backward_time=0.321, grad_norm=40.512, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.685e-04, train_time=1.460 +[gpub011:0/16] 2024-02-09 01:16:44,830 (trainer:762) INFO: 36epoch:train:3301-3400batch: iter_time=8.315e-05, forward_time=0.292, loss_ctc=52.906, loss_att=52.481, acc=0.759, loss=52.609, backward_time=0.299, grad_norm=45.585, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.685e-04, train_time=1.185 +[gpub011:0/16] 2024-02-09 01:19:05,916 (trainer:762) INFO: 36epoch:train:3401-3500batch: iter_time=8.533e-05, forward_time=0.291, loss_ctc=51.175, loss_att=47.719, acc=0.755, loss=48.756, backward_time=0.298, grad_norm=48.129, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.685e-04, train_time=1.411 +[gpub011:0/16] 2024-02-09 01:21:34,588 (trainer:762) INFO: 36epoch:train:3501-3600batch: iter_time=2.603e-04, forward_time=0.405, loss_ctc=49.019, loss_att=46.140, acc=0.755, loss=47.003, backward_time=0.321, grad_norm=42.607, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.685e-04, train_time=1.486 +[gpub011:0/16] 2024-02-09 01:23:32,582 (trainer:762) INFO: 36epoch:train:3601-3700batch: iter_time=8.298e-05, forward_time=0.311, loss_ctc=44.895, loss_att=43.068, acc=0.765, loss=43.616, backward_time=0.298, grad_norm=43.369, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.685e-04, train_time=1.180 +[gpub011:0/16] 2024-02-09 01:24:47,147 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-09 01:25:06,361 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 01:25:09,923 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 01:25:09,923 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-09 01:25:09,929 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 01:31:06,653 (trainer:762) INFO: 36epoch:train:3701-3800batch: iter_time=3.315, forward_time=0.291, loss_ctc=47.427, loss_att=52.648, acc=0.730, loss=51.082, backward_time=0.298, grad_norm=46.952, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.685e-04, train_time=4.540 +[gpub011:0/16] 2024-02-09 01:33:03,153 (trainer:762) INFO: 36epoch:train:3801-3900batch: iter_time=7.524e-05, forward_time=0.292, loss_ctc=57.404, loss_att=51.324, acc=0.745, loss=53.148, backward_time=0.299, grad_norm=59.083, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.684e-04, train_time=1.165 +[gpub011:0/16] 2024-02-09 01:35:33,556 (trainer:762) INFO: 36epoch:train:3901-4000batch: iter_time=8.638e-05, forward_time=0.380, loss_ctc=53.847, loss_att=46.714, acc=0.747, loss=48.854, backward_time=0.329, grad_norm=40.049, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=1.684e-04, train_time=1.504 +[gpub011:0/16] 2024-02-09 01:37:47,942 (trainer:762) INFO: 36epoch:train:4001-4100batch: iter_time=8.612e-05, forward_time=0.289, loss_ctc=44.265, loss_att=40.842, acc=0.749, loss=41.869, backward_time=0.295, grad_norm=38.564, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.684e-04, train_time=1.344 +[gpub011:0/16] 2024-02-09 01:39:42,290 (trainer:762) INFO: 36epoch:train:4101-4200batch: iter_time=8.378e-05, forward_time=0.294, loss_ctc=51.437, loss_att=58.772, acc=0.735, loss=56.571, backward_time=0.301, grad_norm=42.729, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.684e-04, train_time=1.143 +[gpub011:0/16] 2024-02-09 01:41:51,923 (trainer:762) INFO: 36epoch:train:4201-4300batch: iter_time=8.737e-05, forward_time=0.292, loss_ctc=45.574, loss_att=48.067, acc=0.767, loss=47.319, backward_time=0.298, grad_norm=39.616, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.684e-04, train_time=1.297 +[gpub011:0/16] 2024-02-09 01:44:20,870 (trainer:762) INFO: 36epoch:train:4301-4400batch: iter_time=8.161e-05, forward_time=0.382, loss_ctc=53.060, loss_att=52.078, acc=0.748, loss=52.372, backward_time=0.350, grad_norm=46.966, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.684e-04, train_time=1.489 +[gpub011:0/16] 2024-02-09 01:46:36,180 (trainer:762) INFO: 36epoch:train:4401-4500batch: iter_time=8.336e-05, forward_time=0.290, loss_ctc=45.752, loss_att=42.426, acc=0.761, loss=43.424, backward_time=0.298, grad_norm=40.938, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.683e-04, train_time=1.353 +[gpub011:0/16] 2024-02-09 01:48:33,280 (trainer:762) INFO: 36epoch:train:4501-4600batch: iter_time=8.351e-05, forward_time=0.292, loss_ctc=47.685, loss_att=47.724, acc=0.754, loss=47.712, backward_time=0.297, grad_norm=43.272, clip=100.000, loss_scale=3.089e+33, optim_step_time=0.093, optim0_lr0=1.683e-04, train_time=1.170 +[gpub011:0/16] 2024-02-09 01:51:09,693 (trainer:762) INFO: 36epoch:train:4601-4700batch: iter_time=8.292e-05, forward_time=0.293, loss_ctc=53.800, loss_att=50.058, acc=0.757, loss=51.180, backward_time=0.296, grad_norm=44.899, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.683e-04, train_time=1.565 +[gpub011:0/16] 2024-02-09 01:53:32,822 (trainer:762) INFO: 36epoch:train:4701-4800batch: iter_time=4.465e-04, forward_time=0.392, loss_ctc=48.096, loss_att=45.918, acc=0.750, loss=46.571, backward_time=0.344, grad_norm=49.383, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.683e-04, train_time=1.430 +[gpub011:0/16] 2024-02-09 01:55:26,157 (trainer:762) INFO: 36epoch:train:4801-4900batch: iter_time=7.846e-05, forward_time=0.290, loss_ctc=47.308, loss_att=44.309, acc=0.735, loss=45.209, backward_time=0.297, grad_norm=41.842, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.683e-04, train_time=1.134 +[gpub011:0/16] 2024-02-09 01:57:41,164 (trainer:762) INFO: 36epoch:train:4901-5000batch: iter_time=7.952e-05, forward_time=0.291, loss_ctc=48.801, loss_att=50.712, acc=0.736, loss=50.138, backward_time=0.298, grad_norm=44.352, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.683e-04, train_time=1.350 +[gpub011:0/16] 2024-02-09 01:58:01,216 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-09 01:58:20,271 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 01:58:23,884 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 01:58:23,885 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-09 01:58:23,890 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 02:05:26,136 (trainer:762) INFO: 36epoch:train:5001-5100batch: iter_time=3.439, forward_time=0.297, loss_ctc=53.107, loss_att=54.339, acc=0.742, loss=53.970, backward_time=0.298, grad_norm=52.821, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.682e-04, train_time=4.649 +[gpub011:0/16] 2024-02-09 02:08:02,507 (trainer:762) INFO: 36epoch:train:5101-5200batch: iter_time=8.593e-05, forward_time=0.421, loss_ctc=48.974, loss_att=46.831, acc=0.759, loss=47.474, backward_time=0.329, grad_norm=44.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.682e-04, train_time=1.563 +[gpub011:0/16] 2024-02-09 02:10:19,060 (trainer:762) INFO: 36epoch:train:5201-5300batch: iter_time=8.461e-05, forward_time=0.309, loss_ctc=52.532, loss_att=42.091, acc=0.767, loss=45.223, backward_time=0.295, grad_norm=44.576, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.682e-04, train_time=1.366 +[gpub011:0/16] 2024-02-09 02:12:26,367 (trainer:762) INFO: 36epoch:train:5301-5400batch: iter_time=8.307e-05, forward_time=0.313, loss_ctc=46.797, loss_att=51.406, acc=0.754, loss=50.023, backward_time=0.302, grad_norm=40.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.682e-04, train_time=1.273 +[gpub011:0/16] 2024-02-09 02:14:54,577 (trainer:762) INFO: 36epoch:train:5401-5500batch: iter_time=8.193e-05, forward_time=0.396, loss_ctc=47.376, loss_att=50.371, acc=0.757, loss=49.473, backward_time=0.338, grad_norm=41.183, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.682e-04, train_time=1.482 +[gpub011:0/16] 2024-02-09 02:17:05,356 (trainer:762) INFO: 36epoch:train:5501-5600batch: iter_time=8.462e-05, forward_time=0.291, loss_ctc=47.125, loss_att=51.086, acc=0.761, loss=49.898, backward_time=0.297, grad_norm=40.527, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.682e-04, train_time=1.307 +[gpub011:0/16] 2024-02-09 02:19:09,691 (trainer:762) INFO: 36epoch:train:5601-5700batch: iter_time=8.419e-05, forward_time=0.293, loss_ctc=52.339, loss_att=48.507, acc=0.764, loss=49.657, backward_time=0.299, grad_norm=46.183, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.681e-04, train_time=1.244 +[gpub011:0/16] 2024-02-09 02:21:20,416 (trainer:762) INFO: 36epoch:train:5701-5800batch: iter_time=4.550e-04, forward_time=0.306, loss_ctc=44.406, loss_att=42.061, acc=0.765, loss=42.764, backward_time=0.315, grad_norm=41.123, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.681e-04, train_time=1.307 +[gpub011:0/16] 2024-02-09 02:23:49,665 (trainer:762) INFO: 36epoch:train:5801-5900batch: iter_time=9.714e-05, forward_time=0.366, loss_ctc=52.464, loss_att=52.667, acc=0.758, loss=52.606, backward_time=0.356, grad_norm=47.733, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.681e-04, train_time=1.492 +[gpub011:0/16] 2024-02-09 02:25:59,996 (trainer:762) INFO: 36epoch:train:5901-6000batch: iter_time=9.393e-05, forward_time=0.291, loss_ctc=50.617, loss_att=47.233, acc=0.756, loss=48.248, backward_time=0.296, grad_norm=47.394, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.681e-04, train_time=1.302 +[gpub011:0/16] 2024-02-09 02:28:00,361 (trainer:762) INFO: 36epoch:train:6001-6100batch: iter_time=9.778e-05, forward_time=0.300, loss_ctc=48.771, loss_att=46.422, acc=0.757, loss=47.126, backward_time=0.305, grad_norm=42.977, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.681e-04, train_time=1.204 +[gpub011:0/16] 2024-02-09 02:30:26,124 (trainer:762) INFO: 36epoch:train:6101-6200batch: iter_time=1.033e-04, forward_time=0.295, loss_ctc=45.089, loss_att=43.606, acc=0.764, loss=44.051, backward_time=0.304, grad_norm=42.082, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.681e-04, train_time=1.457 +[gpub011:0/16] 2024-02-09 02:32:08,881 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-09 02:32:27,803 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 02:32:31,400 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 02:32:31,400 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-09 02:32:31,445 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 02:38:03,191 (trainer:762) INFO: 36epoch:train:6201-6300batch: iter_time=3.123, forward_time=0.398, loss_ctc=46.611, loss_att=51.149, acc=0.738, loss=49.787, backward_time=0.328, grad_norm=47.752, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.681e-04, train_time=4.570 +[gpub011:0/16] 2024-02-09 02:40:24,300 (trainer:762) INFO: 36epoch:train:6301-6400batch: iter_time=7.356e-05, forward_time=0.292, loss_ctc=56.083, loss_att=52.301, acc=0.758, loss=53.436, backward_time=0.297, grad_norm=58.010, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.680e-04, train_time=1.412 +[gpub011:0/16] 2024-02-09 02:42:37,801 (trainer:762) INFO: 36epoch:train:6401-6500batch: iter_time=7.824e-05, forward_time=0.299, loss_ctc=54.068, loss_att=48.038, acc=0.761, loss=49.847, backward_time=0.316, grad_norm=39.138, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.680e-04, train_time=1.335 +[gpub011:0/16] 2024-02-09 02:45:02,071 (trainer:762) INFO: 36epoch:train:6501-6600batch: iter_time=7.890e-05, forward_time=0.372, loss_ctc=43.773, loss_att=39.718, acc=0.765, loss=40.935, backward_time=0.332, grad_norm=36.901, clip=100.000, loss_scale=6.179e+33, optim_step_time=0.096, optim0_lr0=1.680e-04, train_time=1.442 +[gpub011:0/16] 2024-02-09 02:47:16,630 (trainer:762) INFO: 36epoch:train:6601-6700batch: iter_time=8.212e-05, forward_time=0.296, loss_ctc=51.119, loss_att=58.545, acc=0.743, loss=56.317, backward_time=0.300, grad_norm=44.941, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.680e-04, train_time=1.346 +[gpub011:0/16] 2024-02-09 02:49:40,162 (trainer:762) INFO: 36epoch:train:6701-6800batch: iter_time=8.255e-05, forward_time=0.295, loss_ctc=45.344, loss_att=46.958, acc=0.779, loss=46.474, backward_time=0.309, grad_norm=40.406, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.680e-04, train_time=1.436 +[gpub011:0/16] 2024-02-09 02:50:03,568 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 02:51:37,313 (trainer:762) INFO: 36epoch:train:6801-6900batch: iter_time=8.243e-05, forward_time=0.292, loss_ctc=52.181, loss_att=52.838, acc=0.745, loss=52.641, backward_time=0.305, grad_norm=46.028, clip=100.000, loss_scale=6.084e+33, optim_step_time=0.093, optim0_lr0=1.680e-04, train_time=1.171 +[gpub011:0/16] 2024-02-09 02:54:10,879 (trainer:762) INFO: 36epoch:train:6901-7000batch: iter_time=8.233e-05, forward_time=0.372, loss_ctc=45.753, loss_att=42.516, acc=0.767, loss=43.487, backward_time=0.354, grad_norm=39.101, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.679e-04, train_time=1.534 +[gpub011:0/16] 2024-02-09 02:56:16,445 (trainer:762) INFO: 36epoch:train:7001-7100batch: iter_time=8.998e-05, forward_time=0.291, loss_ctc=47.515, loss_att=47.655, acc=0.760, loss=47.613, backward_time=0.298, grad_norm=43.300, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.679e-04, train_time=1.257 +[gpub011:0/16] 2024-02-09 02:58:26,437 (trainer:762) INFO: 36epoch:train:7101-7200batch: iter_time=3.471e-04, forward_time=0.307, loss_ctc=53.570, loss_att=50.378, acc=0.765, loss=51.335, backward_time=0.307, grad_norm=46.748, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.679e-04, train_time=1.300 +[gpub011:0/16] 2024-02-09 02:59:24,103 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 03:00:57,942 (trainer:762) INFO: 36epoch:train:7201-7300batch: iter_time=8.977e-05, forward_time=0.346, loss_ctc=47.366, loss_att=45.623, acc=0.759, loss=46.146, backward_time=0.364, grad_norm=48.513, clip=100.000, loss_scale=3.488e+33, optim_step_time=0.099, optim0_lr0=1.679e-04, train_time=1.515 +[gpub011:0/16] 2024-02-09 03:03:12,196 (trainer:762) INFO: 36epoch:train:7301-7400batch: iter_time=8.211e-05, forward_time=0.292, loss_ctc=46.213, loss_att=43.332, acc=0.756, loss=44.197, backward_time=0.298, grad_norm=37.922, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.679e-04, train_time=1.342 +[gpub011:0/16] 2024-02-09 03:05:06,826 (trainer:762) INFO: 36epoch:train:7401-7500batch: iter_time=8.766e-05, forward_time=0.293, loss_ctc=49.288, loss_att=51.321, acc=0.751, loss=50.711, backward_time=0.300, grad_norm=45.149, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.679e-04, train_time=1.147 +[gpub011:0/16] 2024-02-09 03:05:26,867 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-09 03:05:45,944 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 03:05:49,802 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 03:05:49,802 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-09 03:05:49,805 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 03:12:52,955 (trainer:762) INFO: 36epoch:train:7501-7600batch: iter_time=3.297, forward_time=0.397, loss_ctc=51.885, loss_att=52.806, acc=0.745, loss=52.530, backward_time=0.323, grad_norm=52.772, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.678e-04, train_time=4.661 +[gpub011:0/16] 2024-02-09 03:15:03,795 (trainer:762) INFO: 36epoch:train:7601-7700batch: iter_time=8.766e-05, forward_time=0.291, loss_ctc=47.691, loss_att=44.900, acc=0.765, loss=45.737, backward_time=0.297, grad_norm=40.373, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.678e-04, train_time=1.308 +[gpub011:0/16] 2024-02-09 03:17:14,734 (trainer:762) INFO: 36epoch:train:7701-7800batch: iter_time=9.259e-05, forward_time=0.291, loss_ctc=52.036, loss_att=42.755, acc=0.762, loss=45.539, backward_time=0.297, grad_norm=43.246, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.678e-04, train_time=1.308 +[gpub011:0/16] 2024-02-09 03:19:32,346 (trainer:762) INFO: 36epoch:train:7801-7900batch: iter_time=8.845e-05, forward_time=0.398, loss_ctc=46.887, loss_att=50.513, acc=0.757, loss=49.425, backward_time=0.322, grad_norm=41.730, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.678e-04, train_time=1.377 +[gpub011:0/16] 2024-02-09 03:21:52,659 (trainer:762) INFO: 36epoch:train:7901-8000batch: iter_time=8.540e-05, forward_time=0.318, loss_ctc=46.984, loss_att=49.547, acc=0.761, loss=48.779, backward_time=0.311, grad_norm=40.453, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.678e-04, train_time=1.403 +[gpub011:0/16] 2024-02-09 03:24:02,503 (trainer:762) INFO: 36epoch:train:8001-8100batch: iter_time=8.291e-05, forward_time=0.293, loss_ctc=47.303, loss_att=51.014, acc=0.761, loss=49.901, backward_time=0.297, grad_norm=40.787, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.678e-04, train_time=1.298 +[gpub011:0/16] 2024-02-09 03:26:04,026 (trainer:762) INFO: 36epoch:train:8101-8200batch: iter_time=8.477e-05, forward_time=0.293, loss_ctc=52.040, loss_att=47.945, acc=0.766, loss=49.173, backward_time=0.312, grad_norm=42.624, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.678e-04, train_time=1.214 +[gpub011:0/16] 2024-02-09 03:28:35,935 (trainer:762) INFO: 36epoch:train:8201-8300batch: iter_time=8.356e-05, forward_time=0.396, loss_ctc=44.276, loss_att=42.250, acc=0.764, loss=42.858, backward_time=0.314, grad_norm=42.029, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.677e-04, train_time=1.520 +[gpub011:0/16] 2024-02-09 03:30:41,338 (trainer:762) INFO: 36epoch:train:8301-8400batch: iter_time=8.035e-05, forward_time=0.314, loss_ctc=51.569, loss_att=52.570, acc=0.759, loss=52.270, backward_time=0.320, grad_norm=47.231, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.677e-04, train_time=1.254 +[gpub011:0/16] 2024-02-09 03:33:01,294 (trainer:762) INFO: 36epoch:train:8401-8500batch: iter_time=8.082e-05, forward_time=0.292, loss_ctc=49.700, loss_att=46.366, acc=0.759, loss=47.366, backward_time=0.297, grad_norm=45.794, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.677e-04, train_time=1.399 +[gpub011:0/16] 2024-02-09 03:35:33,653 (trainer:762) INFO: 36epoch:train:8501-8600batch: iter_time=9.179e-05, forward_time=0.349, loss_ctc=48.466, loss_att=45.751, acc=0.758, loss=46.566, backward_time=0.348, grad_norm=42.307, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.677e-04, train_time=1.523 +[gpub011:0/16] 2024-02-09 03:37:33,924 (trainer:762) INFO: 36epoch:train:8601-8700batch: iter_time=8.777e-05, forward_time=0.317, loss_ctc=44.471, loss_att=42.005, acc=0.768, loss=42.745, backward_time=0.306, grad_norm=40.442, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.677e-04, train_time=1.202 +[gpub011:0/16] 2024-02-09 03:39:05,003 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-09 03:39:24,741 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 03:39:28,700 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 03:39:28,700 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-09 03:39:28,703 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 03:45:08,061 (trainer:762) INFO: 36epoch:train:8701-8800batch: iter_time=3.213, forward_time=0.326, loss_ctc=45.926, loss_att=52.251, acc=0.732, loss=50.354, backward_time=0.299, grad_norm=47.047, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.677e-04, train_time=4.541 +[gpub011:0/16] 2024-02-09 03:47:10,638 (trainer:762) INFO: 36epoch:train:8801-8900batch: iter_time=8.694e-05, forward_time=0.313, loss_ctc=55.615, loss_att=52.200, acc=0.745, loss=53.224, backward_time=0.329, grad_norm=54.329, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.676e-04, train_time=1.225 +[gpub011:0/16] 2024-02-09 03:49:24,999 (trainer:762) INFO: 36epoch:train:8901-9000batch: iter_time=8.772e-05, forward_time=0.309, loss_ctc=53.188, loss_att=47.498, acc=0.744, loss=49.205, backward_time=0.302, grad_norm=42.566, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.676e-04, train_time=1.344 +[gpub011:0/16] 2024-02-09 03:51:41,427 (trainer:762) INFO: 36epoch:train:9001-9100batch: iter_time=9.216e-05, forward_time=0.335, loss_ctc=44.067, loss_att=41.382, acc=0.748, loss=42.187, backward_time=0.296, grad_norm=39.271, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.676e-04, train_time=1.364 +[gpub011:0/16] 2024-02-09 03:53:39,954 (trainer:762) INFO: 36epoch:train:9101-9200batch: iter_time=8.061e-05, forward_time=0.325, loss_ctc=50.350, loss_att=58.482, acc=0.736, loss=56.042, backward_time=0.319, grad_norm=43.634, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.676e-04, train_time=1.185 +[gpub011:0/16] 2024-02-09 03:55:55,669 (trainer:762) INFO: 36epoch:train:9201-9300batch: iter_time=8.730e-05, forward_time=0.291, loss_ctc=45.359, loss_att=48.118, acc=0.767, loss=47.290, backward_time=0.298, grad_norm=39.181, clip=100.000, loss_scale=4.284e+33, optim_step_time=0.093, optim0_lr0=1.676e-04, train_time=1.357 +[gpub011:0/16] 2024-02-09 03:58:24,080 (trainer:762) INFO: 36epoch:train:9301-9400batch: iter_time=8.834e-05, forward_time=0.335, loss_ctc=51.928, loss_att=51.821, acc=0.749, loss=51.853, backward_time=0.338, grad_norm=43.356, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.676e-04, train_time=1.484 +[gpub011:0/16] 2024-02-09 04:00:26,746 (trainer:762) INFO: 36epoch:train:9401-9500batch: iter_time=8.174e-05, forward_time=0.304, loss_ctc=45.134, loss_att=42.637, acc=0.764, loss=43.386, backward_time=0.298, grad_norm=38.708, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.675e-04, train_time=1.227 +[gpub011:0/16] 2024-02-09 04:02:38,744 (trainer:762) INFO: 36epoch:train:9501-9600batch: iter_time=8.406e-05, forward_time=0.316, loss_ctc=47.120, loss_att=47.974, acc=0.754, loss=47.718, backward_time=0.307, grad_norm=43.212, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.675e-04, train_time=1.319 +[gpub011:0/16] 2024-02-09 04:05:03,879 (trainer:762) INFO: 36epoch:train:9601-9700batch: iter_time=8.626e-05, forward_time=0.311, loss_ctc=52.899, loss_att=50.155, acc=0.758, loss=50.978, backward_time=0.310, grad_norm=46.515, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.675e-04, train_time=1.451 +[gpub011:0/16] 2024-02-09 04:07:24,887 (trainer:762) INFO: 36epoch:train:9701-9800batch: iter_time=8.251e-05, forward_time=0.317, loss_ctc=46.839, loss_att=45.652, acc=0.753, loss=46.008, backward_time=0.305, grad_norm=49.403, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.675e-04, train_time=1.411 +[gpub011:0/16] 2024-02-09 04:09:27,447 (trainer:762) INFO: 36epoch:train:9801-9900batch: iter_time=8.501e-05, forward_time=0.338, loss_ctc=46.158, loss_att=44.065, acc=0.739, loss=44.693, backward_time=0.310, grad_norm=41.158, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.675e-04, train_time=1.225 +[gpub011:0/16] 2024-02-09 04:11:41,343 (trainer:762) INFO: 36epoch:train:9901-10000batch: iter_time=8.025e-05, forward_time=0.309, loss_ctc=48.195, loss_att=50.695, acc=0.738, loss=49.945, backward_time=0.316, grad_norm=44.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.675e-04, train_time=1.339 +[gpub011:0/16] 2024-02-09 04:12:01,472 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-09 04:12:20,566 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 04:12:24,337 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 04:12:24,337 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-09 04:12:24,342 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 04:19:13,158 (trainer:762) INFO: 36epoch:train:10001-10100batch: iter_time=3.263, forward_time=0.341, loss_ctc=52.419, loss_att=53.766, acc=0.745, loss=53.362, backward_time=0.310, grad_norm=58.034, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.675e-04, train_time=4.518 +[gpub011:0/16] 2024-02-09 04:21:29,348 (trainer:762) INFO: 36epoch:train:10101-10200batch: iter_time=8.807e-05, forward_time=0.317, loss_ctc=48.184, loss_att=46.483, acc=0.762, loss=46.994, backward_time=0.299, grad_norm=43.901, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.674e-04, train_time=1.362 +[gpub011:0/16] 2024-02-09 04:23:51,313 (trainer:762) INFO: 36epoch:train:10201-10300batch: iter_time=8.955e-05, forward_time=0.336, loss_ctc=52.263, loss_att=41.740, acc=0.769, loss=44.897, backward_time=0.323, grad_norm=43.353, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.674e-04, train_time=1.419 +[gpub011:0/16] 2024-02-09 04:25:59,413 (trainer:762) INFO: 36epoch:train:10301-10400batch: iter_time=8.629e-05, forward_time=0.297, loss_ctc=46.727, loss_att=50.958, acc=0.756, loss=49.689, backward_time=0.305, grad_norm=41.266, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.674e-04, train_time=1.281 +[gpub011:0/16] 2024-02-09 04:28:15,050 (trainer:762) INFO: 36epoch:train:10401-10500batch: iter_time=2.441e-04, forward_time=0.331, loss_ctc=47.026, loss_att=49.926, acc=0.759, loss=49.056, backward_time=0.307, grad_norm=41.092, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.674e-04, train_time=1.356 +[gpub011:0/16] 2024-02-09 04:30:39,218 (trainer:762) INFO: 36epoch:train:10501-10600batch: iter_time=8.732e-05, forward_time=0.359, loss_ctc=46.766, loss_att=50.186, acc=0.764, loss=49.160, backward_time=0.321, grad_norm=40.802, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.674e-04, train_time=1.441 +[gpub011:0/16] 2024-02-09 04:32:46,084 (trainer:762) INFO: 36epoch:train:10601-10700batch: iter_time=9.261e-05, forward_time=0.321, loss_ctc=52.749, loss_att=48.272, acc=0.767, loss=49.615, backward_time=0.331, grad_norm=43.978, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.674e-04, train_time=1.269 +[gpub011:0/16] 2024-02-09 04:35:01,180 (trainer:762) INFO: 36epoch:train:10701-10800batch: iter_time=9.316e-05, forward_time=0.294, loss_ctc=43.773, loss_att=41.692, acc=0.767, loss=42.317, backward_time=0.295, grad_norm=41.522, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.673e-04, train_time=1.350 +[gpub011:0/16] 2024-02-09 04:37:22,142 (trainer:762) INFO: 36epoch:train:10801-10900batch: iter_time=2.150e-04, forward_time=0.351, loss_ctc=51.907, loss_att=52.620, acc=0.759, loss=52.406, backward_time=0.333, grad_norm=44.626, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.673e-04, train_time=1.409 +[gpub011:0/16] 2024-02-09 04:39:25,705 (trainer:762) INFO: 36epoch:train:10901-11000batch: iter_time=8.729e-05, forward_time=0.331, loss_ctc=49.757, loss_att=46.093, acc=0.759, loss=47.192, backward_time=0.316, grad_norm=45.785, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.673e-04, train_time=1.236 +[gpub011:0/16] 2024-02-09 04:41:48,903 (trainer:762) INFO: 36epoch:train:11001-11100batch: iter_time=8.552e-05, forward_time=0.327, loss_ctc=48.127, loss_att=46.086, acc=0.759, loss=46.698, backward_time=0.344, grad_norm=42.917, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.673e-04, train_time=1.431 +[gpub011:0/16] 2024-02-09 04:44:08,003 (trainer:762) INFO: 36epoch:train:11101-11200batch: iter_time=9.159e-05, forward_time=0.321, loss_ctc=44.298, loss_att=43.084, acc=0.765, loss=43.448, backward_time=0.329, grad_norm=39.507, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.673e-04, train_time=1.391 +[gpub011:0/16] 2024-02-09 04:45:27,957 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-09 04:45:47,327 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 04:45:51,258 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 04:45:51,258 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-09 04:45:51,261 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 04:51:42,164 (trainer:762) INFO: 36epoch:train:11201-11300batch: iter_time=3.273, forward_time=0.293, loss_ctc=45.631, loss_att=51.658, acc=0.734, loss=49.850, backward_time=0.301, grad_norm=47.757, clip=100.000, loss_scale=8.567e+33, optim_step_time=0.093, optim0_lr0=1.673e-04, train_time=4.541 +[gpub011:0/16] 2024-02-09 04:53:54,494 (trainer:762) INFO: 36epoch:train:11301-11400batch: iter_time=8.269e-05, forward_time=0.370, loss_ctc=55.000, loss_att=51.266, acc=0.748, loss=52.386, backward_time=0.318, grad_norm=56.400, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.673e-04, train_time=1.323 +[gpub011:0/16] 2024-02-09 04:56:08,736 (trainer:762) INFO: 36epoch:train:11401-11500batch: iter_time=8.389e-05, forward_time=0.329, loss_ctc=53.357, loss_att=46.836, acc=0.749, loss=48.792, backward_time=0.304, grad_norm=41.725, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.672e-04, train_time=1.342 +[gpub011:0/16] 2024-02-09 04:58:27,192 (trainer:762) INFO: 36epoch:train:11501-11600batch: iter_time=9.527e-05, forward_time=0.302, loss_ctc=43.676, loss_att=40.509, acc=0.752, loss=41.459, backward_time=0.303, grad_norm=38.104, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.672e-04, train_time=1.384 +[gpub011:0/16] 2024-02-09 05:00:36,735 (trainer:762) INFO: 36epoch:train:11601-11700batch: iter_time=8.950e-05, forward_time=0.317, loss_ctc=50.372, loss_att=58.780, acc=0.735, loss=56.258, backward_time=0.311, grad_norm=44.363, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.672e-04, train_time=1.295 +[gpub011:0/16] 2024-02-09 05:02:48,963 (trainer:762) INFO: 36epoch:train:11701-11800batch: iter_time=9.125e-05, forward_time=0.316, loss_ctc=44.820, loss_att=47.064, acc=0.770, loss=46.391, backward_time=0.313, grad_norm=39.584, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.672e-04, train_time=1.322 +[gpub011:0/16] 2024-02-09 05:05:13,045 (trainer:762) INFO: 36epoch:train:11801-11900batch: iter_time=8.679e-05, forward_time=0.314, loss_ctc=51.224, loss_att=51.263, acc=0.752, loss=51.252, backward_time=0.324, grad_norm=44.226, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.672e-04, train_time=1.441 +[gpub011:0/16] 2024-02-09 05:07:17,397 (trainer:762) INFO: 36epoch:train:11901-12000batch: iter_time=8.630e-05, forward_time=0.290, loss_ctc=45.416, loss_att=42.753, acc=0.762, loss=43.552, backward_time=0.297, grad_norm=40.050, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.672e-04, train_time=1.243 +[gpub011:0/16] 2024-02-09 05:09:26,033 (trainer:762) INFO: 36epoch:train:12001-12100batch: iter_time=8.810e-05, forward_time=0.327, loss_ctc=47.175, loss_att=47.583, acc=0.755, loss=47.461, backward_time=0.343, grad_norm=41.163, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.671e-04, train_time=1.286 +[gpub011:0/16] 2024-02-09 05:11:46,181 (trainer:762) INFO: 36epoch:train:12101-12200batch: iter_time=8.916e-05, forward_time=0.334, loss_ctc=53.143, loss_att=50.182, acc=0.759, loss=51.070, backward_time=0.320, grad_norm=46.633, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.671e-04, train_time=1.401 +[gpub011:0/16] 2024-02-09 05:14:03,835 (trainer:762) INFO: 36epoch:train:12201-12300batch: iter_time=8.596e-05, forward_time=0.294, loss_ctc=47.262, loss_att=45.526, acc=0.753, loss=46.047, backward_time=0.295, grad_norm=47.404, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.671e-04, train_time=1.376 +[gpub011:0/16] 2024-02-09 05:16:05,583 (trainer:762) INFO: 36epoch:train:12301-12400batch: iter_time=8.472e-05, forward_time=0.313, loss_ctc=46.387, loss_att=44.246, acc=0.738, loss=44.888, backward_time=0.302, grad_norm=40.133, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.671e-04, train_time=1.217 +[gpub011:0/16] 2024-02-09 05:18:22,363 (trainer:762) INFO: 36epoch:train:12401-12500batch: iter_time=8.122e-05, forward_time=0.323, loss_ctc=47.720, loss_att=50.703, acc=0.738, loss=49.808, backward_time=0.300, grad_norm=43.746, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.671e-04, train_time=1.368 +[gpub011:0/16] 2024-02-09 05:18:42,409 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-09 05:19:01,509 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 05:19:05,354 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 05:19:05,354 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-09 05:19:05,358 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 05:25:23,666 (trainer:762) INFO: 36epoch:train:12501-12600batch: iter_time=2.966, forward_time=0.326, loss_ctc=52.291, loss_att=54.344, acc=0.743, loss=53.728, backward_time=0.308, grad_norm=56.640, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.671e-04, train_time=4.213 +[gpub011:0/16] 2024-02-09 05:27:43,572 (trainer:762) INFO: 36epoch:train:12601-12700batch: iter_time=8.554e-05, forward_time=0.315, loss_ctc=46.870, loss_att=45.301, acc=0.765, loss=45.772, backward_time=0.306, grad_norm=40.093, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.671e-04, train_time=1.399 +[gpub011:0/16] 2024-02-09 05:30:01,609 (trainer:762) INFO: 36epoch:train:12701-12800batch: iter_time=8.884e-05, forward_time=0.330, loss_ctc=51.709, loss_att=42.659, acc=0.764, loss=45.374, backward_time=0.300, grad_norm=41.783, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.670e-04, train_time=1.380 +[gpub011:0/16] 2024-02-09 05:32:05,385 (trainer:762) INFO: 36epoch:train:12801-12900batch: iter_time=8.385e-05, forward_time=0.320, loss_ctc=46.613, loss_att=50.559, acc=0.759, loss=49.375, backward_time=0.325, grad_norm=38.894, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.670e-04, train_time=1.237 +[gpub011:0/16] 2024-02-09 05:34:25,414 (trainer:762) INFO: 36epoch:train:12901-13000batch: iter_time=9.054e-05, forward_time=0.301, loss_ctc=46.199, loss_att=49.717, acc=0.762, loss=48.661, backward_time=0.309, grad_norm=39.229, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.670e-04, train_time=1.400 +[gpub011:0/16] 2024-02-09 05:36:44,301 (trainer:762) INFO: 36epoch:train:13001-13100batch: iter_time=8.888e-05, forward_time=0.351, loss_ctc=47.001, loss_att=50.354, acc=0.764, loss=49.348, backward_time=0.318, grad_norm=41.938, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.670e-04, train_time=1.389 +[gpub011:0/16] 2024-02-09 05:38:48,063 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 05:38:54,166 (trainer:762) INFO: 36epoch:train:13101-13200batch: iter_time=9.449e-05, forward_time=0.320, loss_ctc=52.217, loss_att=48.017, acc=0.767, loss=49.277, backward_time=0.328, grad_norm=45.318, clip=100.000, loss_scale=1.017e+34, optim_step_time=0.097, optim0_lr0=1.670e-04, train_time=1.299 +[gpub011:0/16] 2024-02-09 05:41:10,373 (trainer:762) INFO: 36epoch:train:13201-13300batch: iter_time=9.177e-05, forward_time=0.290, loss_ctc=43.597, loss_att=42.389, acc=0.765, loss=42.752, backward_time=0.295, grad_norm=42.737, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.670e-04, train_time=1.362 +[gpub011:0/16] 2024-02-09 05:43:29,887 (trainer:762) INFO: 36epoch:train:13301-13400batch: iter_time=9.321e-05, forward_time=0.363, loss_ctc=51.244, loss_att=52.087, acc=0.760, loss=51.834, backward_time=0.315, grad_norm=44.435, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.669e-04, train_time=1.395 +[gpub011:0/16] 2024-02-09 05:45:33,940 (trainer:762) INFO: 36epoch:train:13401-13500batch: iter_time=9.242e-05, forward_time=0.330, loss_ctc=49.988, loss_att=46.188, acc=0.759, loss=47.328, backward_time=0.313, grad_norm=46.063, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.669e-04, train_time=1.240 +[gpub011:0/16] 2024-02-09 05:48:11,466 (trainer:762) INFO: 36epoch:train:13501-13600batch: iter_time=9.796e-05, forward_time=0.303, loss_ctc=48.321, loss_att=46.018, acc=0.759, loss=46.709, backward_time=0.300, grad_norm=43.919, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.669e-04, train_time=1.575 +[gpub011:0/16] 2024-02-09 05:50:13,514 (trainer:762) INFO: 36epoch:train:13601-13700batch: iter_time=1.003e-04, forward_time=0.319, loss_ctc=44.229, loss_att=42.363, acc=0.771, loss=42.923, backward_time=0.304, grad_norm=39.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.669e-04, train_time=1.220 +[gpub011:0/16] 2024-02-09 05:51:40,507 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-09 05:51:59,910 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 05:52:03,493 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 05:52:03,493 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-09 05:52:03,524 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 05:57:30,418 (trainer:762) INFO: 36epoch:train:13701-13800batch: iter_time=3.052, forward_time=0.363, loss_ctc=45.686, loss_att=51.870, acc=0.732, loss=50.015, backward_time=0.327, grad_norm=46.847, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.669e-04, train_time=4.369 +[gpub011:0/16] 2024-02-09 05:59:31,713 (trainer:762) INFO: 36epoch:train:13801-13900batch: iter_time=8.158e-05, forward_time=0.311, loss_ctc=54.688, loss_att=51.012, acc=0.747, loss=52.115, backward_time=0.308, grad_norm=58.176, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.669e-04, train_time=1.213 +[gpub011:0/16] 2024-02-09 06:01:54,741 (trainer:762) INFO: 36epoch:train:13901-14000batch: iter_time=3.219e-04, forward_time=0.354, loss_ctc=53.026, loss_att=47.116, acc=0.746, loss=48.889, backward_time=0.335, grad_norm=43.871, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.668e-04, train_time=1.430 +[gpub011:0/16] 2024-02-09 06:04:19,479 (trainer:762) INFO: 36epoch:train:14001-14100batch: iter_time=8.515e-05, forward_time=0.299, loss_ctc=43.135, loss_att=40.486, acc=0.753, loss=41.280, backward_time=0.306, grad_norm=39.223, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.668e-04, train_time=1.447 +[gpub011:0/16] 2024-02-09 06:06:38,699 (trainer:762) INFO: 36epoch:train:14101-14200batch: iter_time=8.543e-05, forward_time=0.379, loss_ctc=49.990, loss_att=58.122, acc=0.739, loss=55.682, backward_time=0.329, grad_norm=43.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.668e-04, train_time=1.392 +[gpub011:0/16] 2024-02-09 06:08:49,549 (trainer:762) INFO: 36epoch:train:14201-14300batch: iter_time=8.697e-05, forward_time=0.304, loss_ctc=44.803, loss_att=47.061, acc=0.768, loss=46.383, backward_time=0.303, grad_norm=40.609, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.668e-04, train_time=1.309 +[gpub011:0/16] 2024-02-09 06:11:30,318 (trainer:762) INFO: 36epoch:train:14301-14400batch: iter_time=8.470e-05, forward_time=0.388, loss_ctc=51.727, loss_att=51.575, acc=0.750, loss=51.621, backward_time=0.322, grad_norm=44.126, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.668e-04, train_time=1.607 +[gpub011:0/16] 2024-02-09 06:13:29,682 (trainer:762) INFO: 36epoch:train:14401-14500batch: iter_time=8.768e-05, forward_time=0.294, loss_ctc=44.924, loss_att=42.385, acc=0.763, loss=43.147, backward_time=0.300, grad_norm=41.288, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.668e-04, train_time=1.193 +[gpub011:0/16] 2024-02-09 06:16:08,360 (trainer:762) INFO: 36epoch:train:14501-14600batch: iter_time=8.404e-05, forward_time=0.341, loss_ctc=46.910, loss_att=47.860, acc=0.755, loss=47.575, backward_time=0.308, grad_norm=42.065, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.668e-04, train_time=1.587 +[gpub011:0/16] 2024-02-09 06:18:16,135 (trainer:762) INFO: 36epoch:train:14601-14700batch: iter_time=8.577e-05, forward_time=0.338, loss_ctc=52.500, loss_att=49.895, acc=0.760, loss=50.677, backward_time=0.329, grad_norm=44.041, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.667e-04, train_time=1.277 +[gpub011:0/16] 2024-02-09 06:20:42,104 (trainer:762) INFO: 36epoch:train:14701-14800batch: iter_time=8.849e-05, forward_time=0.313, loss_ctc=46.079, loss_att=44.751, acc=0.756, loss=45.149, backward_time=0.300, grad_norm=45.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.667e-04, train_time=1.460 +[gpub011:0/16] 2024-02-09 06:23:12,636 (trainer:762) INFO: 36epoch:train:14801-14900batch: iter_time=0.001, forward_time=0.366, loss_ctc=46.137, loss_att=43.992, acc=0.739, loss=44.636, backward_time=0.361, grad_norm=40.164, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.667e-04, train_time=1.504 +[gpub011:0/16] 2024-02-09 06:25:31,116 (trainer:762) INFO: 36epoch:train:14901-15000batch: iter_time=8.137e-05, forward_time=0.306, loss_ctc=47.736, loss_att=50.310, acc=0.740, loss=49.538, backward_time=0.308, grad_norm=44.751, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.667e-04, train_time=1.385 +[gpub011:0/16] 2024-02-09 07:03:27,940 (trainer:361) INFO: 36epoch results: [train] iter_time=0.259, forward_time=0.323, loss_ctc=49.145, loss_att=48.232, acc=0.753, loss=48.506, backward_time=0.311, grad_norm=44.299, clip=100.000, loss_scale=5.565e+33, optim_step_time=0.095, optim0_lr0=1.679e-04, train_time=1.603, time=6 hours, 41 minutes and 11.18 seconds, total_count=570000, gpu_max_cached_mem_GB=43.805, [valid] loss_ctc=35.002, cer_ctc=0.183, loss_att=38.491, acc=0.681, cer=0.336, wer=0.995, loss=37.444, time=37 minutes and 32.31 seconds, total_count=177498, gpu_max_cached_mem_GB=43.805 +[gpub011:0/16] 2024-02-09 07:03:36,946 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-09 07:03:37,023 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/25epoch.pth +[gpub011:0/16] 2024-02-09 07:03:37,023 (trainer:290) INFO: 37/45epoch started. Estimated time to finish: 2 days, 18 hours and 42 minutes +[gpub011:0/16] 2024-02-09 07:03:37,033 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-09 07:03:55,400 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 07:03:58,782 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 07:03:58,783 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-09 07:03:58,786 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 07:10:52,677 (trainer:762) INFO: 37epoch:train:1-100batch: iter_time=3.182, forward_time=0.373, loss_ctc=49.240, loss_att=44.451, acc=0.751, loss=45.888, backward_time=0.304, grad_norm=45.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.667e-04, train_time=4.356 +[gpub011:0/16] 2024-02-09 07:12:43,997 (trainer:762) INFO: 37epoch:train:101-200batch: iter_time=9.024e-05, forward_time=0.291, loss_ctc=48.026, loss_att=41.576, acc=0.762, loss=43.511, backward_time=0.303, grad_norm=42.030, clip=100.000, loss_scale=5.400e+33, optim_step_time=0.093, optim0_lr0=1.667e-04, train_time=1.113 +[gpub011:0/16] 2024-02-09 07:15:16,563 (trainer:762) INFO: 37epoch:train:201-300batch: iter_time=5.546e-04, forward_time=0.368, loss_ctc=49.152, loss_att=50.510, acc=0.754, loss=50.103, backward_time=0.349, grad_norm=44.317, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.666e-04, train_time=1.525 +[gpub011:0/16] 2024-02-09 07:17:13,376 (trainer:762) INFO: 37epoch:train:301-400batch: iter_time=9.469e-05, forward_time=0.301, loss_ctc=53.910, loss_att=45.743, acc=0.754, loss=48.193, backward_time=0.298, grad_norm=50.137, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.666e-04, train_time=1.168 +[gpub011:0/16] 2024-02-09 07:19:22,635 (trainer:762) INFO: 37epoch:train:401-500batch: iter_time=1.005e-04, forward_time=0.294, loss_ctc=53.586, loss_att=53.870, acc=0.737, loss=53.785, backward_time=0.301, grad_norm=49.730, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.666e-04, train_time=1.292 +[gpub011:0/16] 2024-02-09 07:21:46,504 (trainer:762) INFO: 37epoch:train:501-600batch: iter_time=9.318e-05, forward_time=0.307, loss_ctc=48.007, loss_att=50.247, acc=0.740, loss=49.575, backward_time=0.323, grad_norm=43.915, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.666e-04, train_time=1.438 +[gpub011:0/16] 2024-02-09 07:23:50,747 (trainer:762) INFO: 37epoch:train:601-700batch: iter_time=1.024e-04, forward_time=0.331, loss_ctc=49.536, loss_att=48.507, acc=0.746, loss=48.816, backward_time=0.329, grad_norm=43.764, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.666e-04, train_time=1.242 +[gpub011:0/16] 2024-02-09 07:25:50,585 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 07:26:03,819 (trainer:762) INFO: 37epoch:train:701-800batch: iter_time=9.640e-05, forward_time=0.291, loss_ctc=42.396, loss_att=35.373, acc=0.777, loss=37.480, backward_time=0.300, grad_norm=37.520, clip=100.000, loss_scale=9.755e+33, optim_step_time=0.093, optim0_lr0=1.666e-04, train_time=1.331 +[gpub011:0/16] 2024-02-09 07:28:12,615 (trainer:762) INFO: 37epoch:train:801-900batch: iter_time=9.698e-05, forward_time=0.312, loss_ctc=45.694, loss_att=41.858, acc=0.747, loss=43.009, backward_time=0.305, grad_norm=42.471, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.666e-04, train_time=1.288 +[gpub011:0/16] 2024-02-09 07:30:27,141 (trainer:762) INFO: 37epoch:train:901-1000batch: iter_time=9.338e-05, forward_time=0.337, loss_ctc=46.603, loss_att=50.184, acc=0.745, loss=49.110, backward_time=0.332, grad_norm=45.108, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.665e-04, train_time=1.345 +[gpub011:0/16] 2024-02-09 07:32:39,675 (trainer:762) INFO: 37epoch:train:1001-1100batch: iter_time=9.314e-05, forward_time=0.295, loss_ctc=49.048, loss_att=44.901, acc=0.760, loss=46.145, backward_time=0.304, grad_norm=40.738, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.665e-04, train_time=1.325 +[gpub011:0/16] 2024-02-09 07:34:54,548 (trainer:762) INFO: 37epoch:train:1101-1200batch: iter_time=3.494e-04, forward_time=0.370, loss_ctc=43.149, loss_att=39.572, acc=0.755, loss=40.645, backward_time=0.316, grad_norm=38.232, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.665e-04, train_time=1.349 +[gpub011:0/16] 2024-02-09 07:36:11,998 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-09 07:36:31,151 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 07:36:34,659 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 07:36:34,659 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-09 07:36:34,682 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 07:42:18,391 (trainer:762) INFO: 37epoch:train:1201-1300batch: iter_time=3.149, forward_time=0.290, loss_ctc=45.962, loss_att=43.875, acc=0.762, loss=44.501, backward_time=0.297, grad_norm=43.657, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.665e-04, train_time=4.438 +[gpub011:0/16] 2024-02-09 07:44:13,572 (trainer:762) INFO: 37epoch:train:1301-1400batch: iter_time=7.391e-05, forward_time=0.290, loss_ctc=48.063, loss_att=42.476, acc=0.757, loss=44.152, backward_time=0.296, grad_norm=42.490, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.665e-04, train_time=1.151 +[gpub011:0/16] 2024-02-09 07:46:16,980 (trainer:762) INFO: 37epoch:train:1401-1500batch: iter_time=8.344e-05, forward_time=0.331, loss_ctc=48.128, loss_att=51.115, acc=0.762, loss=50.219, backward_time=0.324, grad_norm=42.141, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.665e-04, train_time=1.234 +[gpub011:0/16] 2024-02-09 07:48:35,199 (trainer:762) INFO: 37epoch:train:1501-1600batch: iter_time=8.844e-05, forward_time=0.359, loss_ctc=49.102, loss_att=44.942, acc=0.775, loss=46.190, backward_time=0.311, grad_norm=49.347, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.664e-04, train_time=1.382 +[gpub011:0/16] 2024-02-09 07:50:50,886 (trainer:762) INFO: 37epoch:train:1601-1700batch: iter_time=9.246e-05, forward_time=0.292, loss_ctc=54.404, loss_att=51.382, acc=0.756, loss=52.289, backward_time=0.296, grad_norm=49.812, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.664e-04, train_time=1.357 +[gpub011:0/16] 2024-02-09 07:52:43,018 (trainer:762) INFO: 37epoch:train:1701-1800batch: iter_time=8.910e-05, forward_time=0.296, loss_ctc=51.089, loss_att=51.336, acc=0.754, loss=51.262, backward_time=0.308, grad_norm=43.658, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.664e-04, train_time=1.121 +[gpub011:0/16] 2024-02-09 07:54:43,744 (trainer:762) INFO: 37epoch:train:1801-1900batch: iter_time=9.080e-05, forward_time=0.318, loss_ctc=46.729, loss_att=46.924, acc=0.748, loss=46.865, backward_time=0.320, grad_norm=46.501, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.664e-04, train_time=1.207 +[gpub011:0/16] 2024-02-09 07:56:06,995 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 07:57:19,551 (trainer:762) INFO: 37epoch:train:1901-2000batch: iter_time=9.699e-05, forward_time=0.349, loss_ctc=46.581, loss_att=44.763, acc=0.774, loss=45.308, backward_time=0.319, grad_norm=44.853, clip=100.000, loss_scale=3.540e+33, optim_step_time=0.095, optim0_lr0=1.664e-04, train_time=1.558 +[gpub011:0/16] 2024-02-09 07:59:10,133 (trainer:762) INFO: 37epoch:train:2001-2100batch: iter_time=8.307e-05, forward_time=0.293, loss_ctc=47.877, loss_att=41.666, acc=0.767, loss=43.529, backward_time=0.298, grad_norm=40.406, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.664e-04, train_time=1.106 +[gpub011:0/16] 2024-02-09 08:01:12,388 (trainer:762) INFO: 37epoch:train:2101-2200batch: iter_time=8.757e-05, forward_time=0.293, loss_ctc=40.650, loss_att=38.848, acc=0.768, loss=39.389, backward_time=0.300, grad_norm=36.379, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.664e-04, train_time=1.222 +[gpub011:0/16] 2024-02-09 08:03:59,031 (trainer:762) INFO: 37epoch:train:2201-2300batch: iter_time=9.228e-05, forward_time=0.366, loss_ctc=51.328, loss_att=52.678, acc=0.761, loss=52.273, backward_time=0.334, grad_norm=46.632, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.663e-04, train_time=1.666 +[gpub011:0/16] 2024-02-09 08:05:49,384 (trainer:762) INFO: 37epoch:train:2301-2400batch: iter_time=8.798e-05, forward_time=0.304, loss_ctc=39.110, loss_att=36.333, acc=0.779, loss=37.166, backward_time=0.297, grad_norm=33.691, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.663e-04, train_time=1.103 +[gpub011:0/16] 2024-02-09 08:07:56,908 (trainer:762) INFO: 37epoch:train:2401-2500batch: iter_time=8.799e-05, forward_time=0.291, loss_ctc=47.603, loss_att=43.863, acc=0.751, loss=44.985, backward_time=0.295, grad_norm=43.857, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.663e-04, train_time=1.275 +[gpub011:0/16] 2024-02-09 08:08:16,936 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-09 08:08:35,897 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 08:08:39,408 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 08:08:39,408 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-09 08:08:39,411 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 08:14:48,136 (trainer:762) INFO: 37epoch:train:2501-2600batch: iter_time=2.848, forward_time=0.305, loss_ctc=47.899, loss_att=42.322, acc=0.772, loss=43.995, backward_time=0.324, grad_norm=39.981, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.663e-04, train_time=4.112 +[gpub011:0/16] 2024-02-09 08:16:59,975 (trainer:762) INFO: 37epoch:train:2601-2700batch: iter_time=4.871e-04, forward_time=0.344, loss_ctc=47.882, loss_att=41.495, acc=0.768, loss=43.411, backward_time=0.300, grad_norm=43.408, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=1.663e-04, train_time=1.318 +[gpub011:0/16] 2024-02-09 08:19:18,916 (trainer:762) INFO: 37epoch:train:2701-2800batch: iter_time=8.385e-05, forward_time=0.299, loss_ctc=48.515, loss_att=50.930, acc=0.767, loss=50.205, backward_time=0.304, grad_norm=40.990, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.663e-04, train_time=1.389 +[gpub011:0/16] 2024-02-09 08:21:29,893 (trainer:762) INFO: 37epoch:train:2801-2900batch: iter_time=8.873e-05, forward_time=0.289, loss_ctc=50.937, loss_att=44.596, acc=0.772, loss=46.498, backward_time=0.296, grad_norm=47.731, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.662e-04, train_time=1.310 +[gpub011:0/16] 2024-02-09 08:23:33,756 (trainer:762) INFO: 37epoch:train:2901-3000batch: iter_time=8.553e-05, forward_time=0.323, loss_ctc=51.448, loss_att=53.639, acc=0.746, loss=52.982, backward_time=0.310, grad_norm=46.245, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.662e-04, train_time=1.238 +[gpub011:0/16] 2024-02-09 08:25:54,735 (trainer:762) INFO: 37epoch:train:3001-3100batch: iter_time=8.626e-05, forward_time=0.377, loss_ctc=47.834, loss_att=49.110, acc=0.765, loss=48.727, backward_time=0.303, grad_norm=42.579, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.662e-04, train_time=1.410 +[gpub011:0/16] 2024-02-09 08:28:12,904 (trainer:762) INFO: 37epoch:train:3101-3200batch: iter_time=8.923e-05, forward_time=0.297, loss_ctc=48.583, loss_att=49.053, acc=0.752, loss=48.912, backward_time=0.300, grad_norm=44.127, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.662e-04, train_time=1.381 +[gpub011:0/16] 2024-02-09 08:30:05,553 (trainer:762) INFO: 37epoch:train:3201-3300batch: iter_time=8.191e-05, forward_time=0.308, loss_ctc=41.860, loss_att=35.054, acc=0.787, loss=37.096, backward_time=0.295, grad_norm=36.314, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.662e-04, train_time=1.126 +[gpub011:0/16] 2024-02-09 08:32:13,081 (trainer:762) INFO: 37epoch:train:3301-3400batch: iter_time=8.393e-05, forward_time=0.305, loss_ctc=45.233, loss_att=40.887, acc=0.761, loss=42.191, backward_time=0.308, grad_norm=43.258, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.662e-04, train_time=1.275 +[gpub011:0/16] 2024-02-09 08:34:36,304 (trainer:762) INFO: 37epoch:train:3401-3500batch: iter_time=2.265e-04, forward_time=0.376, loss_ctc=45.339, loss_att=48.842, acc=0.756, loss=47.791, backward_time=0.319, grad_norm=44.118, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.662e-04, train_time=1.432 +[gpub011:0/16] 2024-02-09 08:36:40,057 (trainer:762) INFO: 37epoch:train:3501-3600batch: iter_time=8.816e-05, forward_time=0.293, loss_ctc=48.798, loss_att=45.773, acc=0.769, loss=46.680, backward_time=0.300, grad_norm=40.502, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.661e-04, train_time=1.237 +[gpub011:0/16] 2024-02-09 08:38:47,997 (trainer:762) INFO: 37epoch:train:3601-3700batch: iter_time=8.998e-05, forward_time=0.291, loss_ctc=41.912, loss_att=38.253, acc=0.767, loss=39.350, backward_time=0.298, grad_norm=37.584, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.661e-04, train_time=1.279 +[gpub011:0/16] 2024-02-09 08:40:13,188 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-09 08:40:32,857 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 08:40:36,448 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 08:40:36,448 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-09 08:40:36,451 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 08:46:37,623 (trainer:762) INFO: 37epoch:train:3701-3800batch: iter_time=3.408, forward_time=0.362, loss_ctc=46.238, loss_att=44.690, acc=0.758, loss=45.154, backward_time=0.304, grad_norm=44.636, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.661e-04, train_time=4.696 +[gpub011:0/16] 2024-02-09 08:48:39,367 (trainer:762) INFO: 37epoch:train:3801-3900batch: iter_time=7.433e-05, forward_time=0.290, loss_ctc=47.497, loss_att=42.335, acc=0.752, loss=43.884, backward_time=0.295, grad_norm=41.922, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.661e-04, train_time=1.217 +[gpub011:0/16] 2024-02-09 08:50:43,155 (trainer:762) INFO: 37epoch:train:3901-4000batch: iter_time=2.623e-04, forward_time=0.298, loss_ctc=47.848, loss_att=51.147, acc=0.753, loss=50.157, backward_time=0.301, grad_norm=42.739, clip=100.000, loss_scale=4.232e+33, optim_step_time=0.095, optim0_lr0=1.661e-04, train_time=1.238 +[gpub011:0/16] 2024-02-09 08:53:00,716 (trainer:762) INFO: 37epoch:train:4001-4100batch: iter_time=8.222e-05, forward_time=0.351, loss_ctc=48.617, loss_att=45.903, acc=0.767, loss=46.717, backward_time=0.343, grad_norm=47.030, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.661e-04, train_time=1.375 +[gpub011:0/16] 2024-02-09 08:54:59,169 (trainer:762) INFO: 37epoch:train:4101-4200batch: iter_time=7.906e-05, forward_time=0.316, loss_ctc=52.302, loss_att=50.583, acc=0.753, loss=51.099, backward_time=0.298, grad_norm=47.179, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.660e-04, train_time=1.184 +[gpub011:0/16] 2024-02-09 08:57:12,739 (trainer:762) INFO: 37epoch:train:4201-4300batch: iter_time=8.015e-05, forward_time=0.295, loss_ctc=50.606, loss_att=50.637, acc=0.748, loss=50.628, backward_time=0.301, grad_norm=43.093, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.660e-04, train_time=1.336 +[gpub011:0/16] 2024-02-09 08:59:18,242 (trainer:762) INFO: 37epoch:train:4301-4400batch: iter_time=8.162e-05, forward_time=0.289, loss_ctc=45.787, loss_att=47.262, acc=0.735, loss=46.820, backward_time=0.297, grad_norm=46.972, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.660e-04, train_time=1.255 +[gpub011:0/16] 2024-02-09 09:01:24,698 (trainer:762) INFO: 37epoch:train:4401-4500batch: iter_time=8.983e-05, forward_time=0.387, loss_ctc=46.260, loss_att=44.020, acc=0.770, loss=44.692, backward_time=0.324, grad_norm=40.403, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.660e-04, train_time=1.264 +[gpub011:0/16] 2024-02-09 09:03:39,990 (trainer:762) INFO: 37epoch:train:4501-4600batch: iter_time=8.415e-05, forward_time=0.290, loss_ctc=47.757, loss_att=41.761, acc=0.760, loss=43.560, backward_time=0.296, grad_norm=43.043, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.660e-04, train_time=1.353 +[gpub011:0/16] 2024-02-09 09:05:44,416 (trainer:762) INFO: 37epoch:train:4601-4700batch: iter_time=8.799e-05, forward_time=0.292, loss_ctc=39.955, loss_att=38.910, acc=0.768, loss=39.223, backward_time=0.300, grad_norm=34.519, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.660e-04, train_time=1.244 +[gpub011:0/16] 2024-02-09 09:07:41,707 (trainer:762) INFO: 37epoch:train:4701-4800batch: iter_time=8.152e-05, forward_time=0.334, loss_ctc=50.857, loss_att=52.321, acc=0.757, loss=51.882, backward_time=0.328, grad_norm=47.552, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.660e-04, train_time=1.172 +[gpub011:0/16] 2024-02-09 09:09:58,196 (trainer:762) INFO: 37epoch:train:4801-4900batch: iter_time=8.107e-05, forward_time=0.331, loss_ctc=38.906, loss_att=35.950, acc=0.774, loss=36.837, backward_time=0.305, grad_norm=33.101, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.659e-04, train_time=1.365 +[gpub011:0/16] 2024-02-09 09:12:02,526 (trainer:762) INFO: 37epoch:train:4901-5000batch: iter_time=7.681e-05, forward_time=0.295, loss_ctc=46.627, loss_att=44.501, acc=0.740, loss=45.139, backward_time=0.296, grad_norm=42.112, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.659e-04, train_time=1.243 +[gpub011:0/16] 2024-02-09 09:12:22,670 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-09 09:12:42,028 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 09:12:45,504 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 09:12:45,504 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-09 09:12:45,575 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 09:19:30,726 (trainer:762) INFO: 37epoch:train:5001-5100batch: iter_time=3.342, forward_time=0.301, loss_ctc=47.548, loss_att=42.812, acc=0.759, loss=44.233, backward_time=0.299, grad_norm=39.234, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.659e-04, train_time=4.482 +[gpub011:0/16] 2024-02-09 09:22:05,063 (trainer:762) INFO: 37epoch:train:5101-5200batch: iter_time=8.260e-05, forward_time=0.383, loss_ctc=47.276, loss_att=41.139, acc=0.765, loss=42.980, backward_time=0.314, grad_norm=40.498, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.659e-04, train_time=1.542 +[gpub011:0/16] 2024-02-09 09:23:58,742 (trainer:762) INFO: 37epoch:train:5201-5300batch: iter_time=9.032e-05, forward_time=0.292, loss_ctc=47.799, loss_att=50.015, acc=0.758, loss=49.350, backward_time=0.299, grad_norm=41.021, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.659e-04, train_time=1.137 +[gpub011:0/16] 2024-02-09 09:25:59,674 (trainer:762) INFO: 37epoch:train:5301-5400batch: iter_time=8.897e-05, forward_time=0.293, loss_ctc=49.709, loss_att=45.538, acc=0.759, loss=46.789, backward_time=0.300, grad_norm=47.195, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.659e-04, train_time=1.209 +[gpub011:0/16] 2024-02-09 09:28:29,568 (trainer:762) INFO: 37epoch:train:5401-5500batch: iter_time=8.849e-05, forward_time=0.301, loss_ctc=51.331, loss_att=52.028, acc=0.747, loss=51.819, backward_time=0.307, grad_norm=46.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.659e-04, train_time=1.499 +[gpub011:0/16] 2024-02-09 09:30:43,929 (trainer:762) INFO: 37epoch:train:5501-5600batch: iter_time=8.700e-05, forward_time=0.357, loss_ctc=47.467, loss_att=50.005, acc=0.746, loss=49.243, backward_time=0.318, grad_norm=43.395, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.658e-04, train_time=1.343 +[gpub011:0/16] 2024-02-09 09:32:35,948 (trainer:762) INFO: 37epoch:train:5601-5700batch: iter_time=8.844e-05, forward_time=0.291, loss_ctc=47.223, loss_att=47.346, acc=0.752, loss=47.309, backward_time=0.298, grad_norm=39.611, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.658e-04, train_time=1.120 +[gpub011:0/16] 2024-02-09 09:34:38,269 (trainer:762) INFO: 37epoch:train:5701-5800batch: iter_time=9.635e-05, forward_time=0.296, loss_ctc=41.771, loss_att=35.045, acc=0.782, loss=37.062, backward_time=0.299, grad_norm=35.599, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.658e-04, train_time=1.223 +[gpub011:0/16] 2024-02-09 09:37:01,703 (trainer:762) INFO: 37epoch:train:5801-5900batch: iter_time=9.482e-05, forward_time=0.373, loss_ctc=45.009, loss_att=41.587, acc=0.753, loss=42.614, backward_time=0.322, grad_norm=41.041, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.658e-04, train_time=1.435 +[gpub011:0/16] 2024-02-09 09:38:58,795 (trainer:762) INFO: 37epoch:train:5901-6000batch: iter_time=9.213e-05, forward_time=0.291, loss_ctc=45.005, loss_att=48.948, acc=0.750, loss=47.765, backward_time=0.298, grad_norm=43.709, clip=100.000, loss_scale=8.463e+33, optim_step_time=0.093, optim0_lr0=1.658e-04, train_time=1.170 +[gpub011:0/16] 2024-02-09 09:41:12,097 (trainer:762) INFO: 37epoch:train:6001-6100batch: iter_time=9.747e-05, forward_time=0.296, loss_ctc=48.105, loss_att=44.628, acc=0.765, loss=45.671, backward_time=0.300, grad_norm=42.238, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.658e-04, train_time=1.333 +[gpub011:0/16] 2024-02-09 09:43:17,490 (trainer:762) INFO: 37epoch:train:6101-6200batch: iter_time=1.052e-04, forward_time=0.290, loss_ctc=41.789, loss_att=38.982, acc=0.758, loss=39.824, backward_time=0.295, grad_norm=36.072, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.657e-04, train_time=1.254 +[gpub011:0/16] 2024-02-09 09:44:40,115 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-09 09:44:59,498 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 09:45:03,000 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 09:45:03,000 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-09 09:45:03,003 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 09:50:26,530 (trainer:762) INFO: 37epoch:train:6201-6300batch: iter_time=3.039, forward_time=0.365, loss_ctc=45.550, loss_att=41.465, acc=0.763, loss=42.691, backward_time=0.329, grad_norm=41.350, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.657e-04, train_time=4.290 +[gpub011:0/16] 2024-02-09 09:52:28,710 (trainer:762) INFO: 37epoch:train:6301-6400batch: iter_time=8.986e-05, forward_time=0.294, loss_ctc=47.084, loss_att=40.133, acc=0.757, loss=42.218, backward_time=0.298, grad_norm=42.658, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.657e-04, train_time=1.221 +[gpub011:0/16] 2024-02-09 09:54:42,262 (trainer:762) INFO: 37epoch:train:6401-6500batch: iter_time=9.318e-05, forward_time=0.299, loss_ctc=47.706, loss_att=49.761, acc=0.757, loss=49.144, backward_time=0.299, grad_norm=41.760, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.657e-04, train_time=1.336 +[gpub011:0/16] 2024-02-09 09:56:45,604 (trainer:762) INFO: 37epoch:train:6501-6600batch: iter_time=8.557e-05, forward_time=0.350, loss_ctc=48.253, loss_att=44.839, acc=0.769, loss=45.863, backward_time=0.338, grad_norm=46.096, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.657e-04, train_time=1.233 +[gpub011:0/16] 2024-02-09 09:58:54,167 (trainer:762) INFO: 37epoch:train:6601-6700batch: iter_time=9.278e-05, forward_time=0.295, loss_ctc=52.598, loss_att=50.330, acc=0.755, loss=51.010, backward_time=0.301, grad_norm=45.663, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.657e-04, train_time=1.286 +[gpub011:0/16] 2024-02-09 10:00:08,661 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 10:01:11,788 (trainer:762) INFO: 37epoch:train:6701-6800batch: iter_time=9.487e-05, forward_time=0.293, loss_ctc=50.438, loss_att=50.609, acc=0.749, loss=50.558, backward_time=0.301, grad_norm=42.429, clip=100.000, loss_scale=7.448e+33, optim_step_time=0.093, optim0_lr0=1.657e-04, train_time=1.376 +[gpub011:0/16] 2024-02-09 10:03:23,507 (trainer:762) INFO: 37epoch:train:6801-6900batch: iter_time=9.627e-05, forward_time=0.311, loss_ctc=45.156, loss_att=46.958, acc=0.735, loss=46.417, backward_time=0.326, grad_norm=43.041, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.656e-04, train_time=1.317 +[gpub011:0/16] 2024-02-09 10:05:36,237 (trainer:762) INFO: 37epoch:train:6901-7000batch: iter_time=8.674e-05, forward_time=0.323, loss_ctc=45.892, loss_att=43.419, acc=0.774, loss=44.161, backward_time=0.318, grad_norm=38.508, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.656e-04, train_time=1.327 +[gpub011:0/16] 2024-02-09 10:07:43,393 (trainer:762) INFO: 37epoch:train:7001-7100batch: iter_time=8.950e-05, forward_time=0.292, loss_ctc=47.104, loss_att=41.417, acc=0.760, loss=43.123, backward_time=0.300, grad_norm=41.808, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.656e-04, train_time=1.272 +[gpub011:0/16] 2024-02-09 10:09:47,045 (trainer:762) INFO: 37epoch:train:7101-7200batch: iter_time=9.048e-05, forward_time=0.291, loss_ctc=39.432, loss_att=38.531, acc=0.771, loss=38.802, backward_time=0.297, grad_norm=35.997, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.656e-04, train_time=1.236 +[gpub011:0/16] 2024-02-09 10:12:00,870 (trainer:762) INFO: 37epoch:train:7201-7300batch: iter_time=8.864e-05, forward_time=0.351, loss_ctc=50.130, loss_att=51.868, acc=0.761, loss=51.346, backward_time=0.334, grad_norm=46.101, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.656e-04, train_time=1.339 +[gpub011:0/16] 2024-02-09 10:13:53,045 (trainer:762) INFO: 37epoch:train:7301-7400batch: iter_time=8.768e-05, forward_time=0.292, loss_ctc=38.975, loss_att=35.350, acc=0.777, loss=36.437, backward_time=0.298, grad_norm=34.482, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.656e-04, train_time=1.121 +[gpub011:0/16] 2024-02-09 10:16:26,499 (trainer:762) INFO: 37epoch:train:7401-7500batch: iter_time=8.341e-05, forward_time=0.289, loss_ctc=46.269, loss_att=44.168, acc=0.740, loss=44.798, backward_time=0.295, grad_norm=41.968, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.655e-04, train_time=1.535 +[gpub011:0/16] 2024-02-09 10:16:46,527 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-09 10:17:05,750 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 10:17:09,746 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 10:17:09,747 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-09 10:17:09,750 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 10:23:59,408 (trainer:762) INFO: 37epoch:train:7501-7600batch: iter_time=3.320, forward_time=0.359, loss_ctc=47.338, loss_att=45.757, acc=0.770, loss=46.231, backward_time=0.305, grad_norm=44.277, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.655e-04, train_time=4.529 +[gpub011:0/16] 2024-02-09 10:26:07,547 (trainer:762) INFO: 37epoch:train:7601-7700batch: iter_time=8.202e-05, forward_time=0.316, loss_ctc=46.940, loss_att=41.891, acc=0.768, loss=43.406, backward_time=0.322, grad_norm=42.226, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.655e-04, train_time=1.281 +[gpub011:0/16] 2024-02-09 10:28:25,867 (trainer:762) INFO: 37epoch:train:7701-7800batch: iter_time=8.467e-05, forward_time=0.293, loss_ctc=47.652, loss_att=50.675, acc=0.767, loss=49.768, backward_time=0.303, grad_norm=43.426, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.655e-04, train_time=1.382 +[gpub011:0/16] 2024-02-09 10:30:15,738 (trainer:762) INFO: 37epoch:train:7801-7900batch: iter_time=8.406e-05, forward_time=0.292, loss_ctc=49.272, loss_att=44.555, acc=0.774, loss=45.971, backward_time=0.300, grad_norm=44.957, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.655e-04, train_time=1.099 +[gpub011:0/16] 2024-02-09 10:32:49,873 (trainer:762) INFO: 37epoch:train:7901-8000batch: iter_time=9.106e-05, forward_time=0.370, loss_ctc=50.730, loss_att=53.228, acc=0.751, loss=52.479, backward_time=0.342, grad_norm=46.019, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.655e-04, train_time=1.541 +[gpub011:0/16] 2024-02-09 10:35:02,919 (trainer:762) INFO: 37epoch:train:8001-8100batch: iter_time=8.576e-05, forward_time=0.298, loss_ctc=46.896, loss_att=49.384, acc=0.766, loss=48.638, backward_time=0.303, grad_norm=42.706, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.655e-04, train_time=1.330 +[gpub011:0/16] 2024-02-09 10:37:05,042 (trainer:762) INFO: 37epoch:train:8101-8200batch: iter_time=8.301e-05, forward_time=0.292, loss_ctc=47.168, loss_att=47.688, acc=0.760, loss=47.532, backward_time=0.300, grad_norm=41.486, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.654e-04, train_time=1.220 +[gpub011:0/16] 2024-02-09 10:38:57,286 (trainer:762) INFO: 37epoch:train:8201-8300batch: iter_time=8.713e-05, forward_time=0.290, loss_ctc=41.628, loss_att=35.120, acc=0.787, loss=37.072, backward_time=0.299, grad_norm=37.226, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.654e-04, train_time=1.123 +[gpub011:0/16] 2024-02-09 10:41:45,595 (trainer:762) INFO: 37epoch:train:8301-8400batch: iter_time=8.729e-05, forward_time=0.355, loss_ctc=45.129, loss_att=40.835, acc=0.763, loss=42.123, backward_time=0.320, grad_norm=42.392, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.654e-04, train_time=1.683 +[gpub011:0/16] 2024-02-09 10:43:37,792 (trainer:762) INFO: 37epoch:train:8401-8500batch: iter_time=9.214e-05, forward_time=0.294, loss_ctc=43.949, loss_att=48.810, acc=0.759, loss=47.351, backward_time=0.307, grad_norm=43.434, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.654e-04, train_time=1.122 +[gpub011:0/16] 2024-02-09 10:45:36,179 (trainer:762) INFO: 37epoch:train:8501-8600batch: iter_time=8.637e-05, forward_time=0.294, loss_ctc=47.491, loss_att=45.311, acc=0.773, loss=45.965, backward_time=0.300, grad_norm=39.266, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.654e-04, train_time=1.184 +[gpub011:0/16] 2024-02-09 10:48:07,119 (trainer:762) INFO: 37epoch:train:8601-8700batch: iter_time=8.251e-05, forward_time=0.320, loss_ctc=41.151, loss_att=38.066, acc=0.769, loss=38.991, backward_time=0.323, grad_norm=36.265, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.654e-04, train_time=1.509 +[gpub011:0/16] 2024-02-09 10:49:28,898 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-09 10:49:48,214 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 10:49:51,876 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 10:49:51,876 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-09 10:49:51,882 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 10:55:52,434 (trainer:762) INFO: 37epoch:train:8701-8800batch: iter_time=3.460, forward_time=0.324, loss_ctc=45.219, loss_att=41.480, acc=0.774, loss=42.602, backward_time=0.318, grad_norm=41.207, clip=100.000, loss_scale=8.100e+33, optim_step_time=0.094, optim0_lr0=1.654e-04, train_time=4.653 +[gpub011:0/16] 2024-02-09 10:58:24,972 (trainer:762) INFO: 37epoch:train:8801-8900batch: iter_time=7.951e-05, forward_time=0.289, loss_ctc=46.174, loss_att=40.187, acc=0.766, loss=41.983, backward_time=0.297, grad_norm=41.574, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.653e-04, train_time=1.525 +[gpub011:0/16] 2024-02-09 10:59:41,165 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 11:00:30,054 (trainer:762) INFO: 37epoch:train:8901-9000batch: iter_time=8.223e-05, forward_time=0.357, loss_ctc=47.877, loss_att=50.058, acc=0.768, loss=49.404, backward_time=0.339, grad_norm=43.363, clip=100.000, loss_scale=8.339e+33, optim_step_time=0.099, optim0_lr0=1.653e-04, train_time=1.251 +[gpub011:0/16] 2024-02-09 11:02:47,045 (trainer:762) INFO: 37epoch:train:9001-9100batch: iter_time=8.477e-05, forward_time=0.300, loss_ctc=48.263, loss_att=45.199, acc=0.778, loss=46.118, backward_time=0.300, grad_norm=45.989, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.653e-04, train_time=1.370 +[gpub011:0/16] 2024-02-09 11:05:00,840 (trainer:762) INFO: 37epoch:train:9101-9200batch: iter_time=8.616e-05, forward_time=0.294, loss_ctc=51.612, loss_att=50.351, acc=0.763, loss=50.729, backward_time=0.302, grad_norm=46.557, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.653e-04, train_time=1.338 +[gpub011:0/16] 2024-02-09 11:06:14,028 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 11:07:22,479 (trainer:762) INFO: 37epoch:train:9201-9300batch: iter_time=8.587e-05, forward_time=0.321, loss_ctc=49.871, loss_att=49.936, acc=0.760, loss=49.916, backward_time=0.327, grad_norm=40.031, clip=100.000, loss_scale=4.038e+33, optim_step_time=0.096, optim0_lr0=1.653e-04, train_time=1.416 +[gpub011:0/16] 2024-02-09 11:09:25,609 (trainer:762) INFO: 37epoch:train:9301-9400batch: iter_time=8.391e-05, forward_time=0.317, loss_ctc=44.956, loss_att=46.708, acc=0.750, loss=46.182, backward_time=0.330, grad_norm=43.777, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.653e-04, train_time=1.231 +[gpub011:0/16] 2024-02-09 11:11:36,088 (trainer:762) INFO: 37epoch:train:9401-9500batch: iter_time=8.621e-05, forward_time=0.292, loss_ctc=46.711, loss_att=43.918, acc=0.779, loss=44.756, backward_time=0.297, grad_norm=39.518, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.652e-04, train_time=1.305 +[gpub011:0/16] 2024-02-09 11:13:52,963 (trainer:762) INFO: 37epoch:train:9501-9600batch: iter_time=8.692e-05, forward_time=0.291, loss_ctc=47.337, loss_att=41.202, acc=0.772, loss=43.042, backward_time=0.299, grad_norm=40.703, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.652e-04, train_time=1.368 +[gpub011:0/16] 2024-02-09 11:16:07,485 (trainer:762) INFO: 37epoch:train:9601-9700batch: iter_time=8.833e-05, forward_time=0.396, loss_ctc=39.311, loss_att=38.439, acc=0.772, loss=38.700, backward_time=0.320, grad_norm=34.968, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.652e-04, train_time=1.345 +[gpub011:0/16] 2024-02-09 11:18:23,727 (trainer:762) INFO: 37epoch:train:9701-9800batch: iter_time=8.435e-05, forward_time=0.297, loss_ctc=49.538, loss_att=51.869, acc=0.766, loss=51.170, backward_time=0.305, grad_norm=45.534, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.652e-04, train_time=1.362 +[gpub011:0/16] 2024-02-09 11:20:41,809 (trainer:762) INFO: 37epoch:train:9801-9900batch: iter_time=9.338e-05, forward_time=0.296, loss_ctc=38.752, loss_att=35.861, acc=0.784, loss=36.729, backward_time=0.298, grad_norm=33.569, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.652e-04, train_time=1.381 +[gpub011:0/16] 2024-02-09 11:22:44,219 (trainer:762) INFO: 37epoch:train:9901-10000batch: iter_time=8.559e-05, forward_time=0.330, loss_ctc=46.414, loss_att=43.762, acc=0.753, loss=44.558, backward_time=0.313, grad_norm=42.087, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.652e-04, train_time=1.224 +[gpub011:0/16] 2024-02-09 11:23:04,326 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-09 11:23:23,516 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 11:23:27,081 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 11:23:27,081 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-09 11:23:27,116 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 11:30:13,546 (trainer:762) INFO: 37epoch:train:10001-10100batch: iter_time=3.348, forward_time=0.328, loss_ctc=46.718, loss_att=41.608, acc=0.778, loss=43.141, backward_time=0.303, grad_norm=39.801, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.652e-04, train_time=4.493 +[gpub011:0/16] 2024-02-09 11:32:23,546 (trainer:762) INFO: 37epoch:train:10101-10200batch: iter_time=8.100e-05, forward_time=0.289, loss_ctc=46.518, loss_att=40.798, acc=0.771, loss=42.514, backward_time=0.296, grad_norm=41.527, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.651e-04, train_time=1.300 +[gpub011:0/16] 2024-02-09 11:34:50,286 (trainer:762) INFO: 37epoch:train:10201-10300batch: iter_time=8.133e-05, forward_time=0.315, loss_ctc=47.697, loss_att=50.068, acc=0.770, loss=49.357, backward_time=0.340, grad_norm=40.896, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.651e-04, train_time=1.467 +[gpub011:0/16] 2024-02-09 11:36:47,578 (trainer:762) INFO: 37epoch:train:10301-10400batch: iter_time=8.073e-05, forward_time=0.326, loss_ctc=49.654, loss_att=43.959, acc=0.775, loss=45.667, backward_time=0.311, grad_norm=46.348, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.651e-04, train_time=1.173 +[gpub011:0/16] 2024-02-09 11:39:02,822 (trainer:762) INFO: 37epoch:train:10401-10500batch: iter_time=9.687e-05, forward_time=0.294, loss_ctc=50.927, loss_att=53.210, acc=0.749, loss=52.525, backward_time=0.301, grad_norm=45.607, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.651e-04, train_time=1.352 +[gpub011:0/16] 2024-02-09 11:41:18,359 (trainer:762) INFO: 37epoch:train:10501-10600batch: iter_time=8.373e-05, forward_time=0.352, loss_ctc=46.995, loss_att=49.019, acc=0.768, loss=48.412, backward_time=0.314, grad_norm=40.163, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.651e-04, train_time=1.355 +[gpub011:0/16] 2024-02-09 11:43:31,892 (trainer:762) INFO: 37epoch:train:10601-10700batch: iter_time=8.311e-05, forward_time=0.337, loss_ctc=46.921, loss_att=48.090, acc=0.756, loss=47.739, backward_time=0.312, grad_norm=45.217, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.651e-04, train_time=1.335 +[gpub011:0/16] 2024-02-09 11:45:43,898 (trainer:762) INFO: 37epoch:train:10701-10800batch: iter_time=8.522e-05, forward_time=0.294, loss_ctc=41.618, loss_att=34.833, acc=0.789, loss=36.868, backward_time=0.295, grad_norm=38.585, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.651e-04, train_time=1.320 +[gpub011:0/16] 2024-02-09 11:48:03,070 (trainer:762) INFO: 37epoch:train:10801-10900batch: iter_time=8.504e-05, forward_time=0.322, loss_ctc=44.604, loss_att=40.556, acc=0.765, loss=41.770, backward_time=0.307, grad_norm=42.442, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.650e-04, train_time=1.391 +[gpub011:0/16] 2024-02-09 11:50:22,532 (trainer:762) INFO: 37epoch:train:10901-11000batch: iter_time=8.459e-05, forward_time=0.343, loss_ctc=43.960, loss_att=48.820, acc=0.758, loss=47.362, backward_time=0.326, grad_norm=42.162, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.650e-04, train_time=1.394 +[gpub011:0/16] 2024-02-09 11:52:28,848 (trainer:762) INFO: 37epoch:train:11001-11100batch: iter_time=8.356e-05, forward_time=0.292, loss_ctc=47.378, loss_att=44.822, acc=0.774, loss=45.589, backward_time=0.301, grad_norm=39.997, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.650e-04, train_time=1.263 +[gpub011:0/16] 2024-02-09 11:54:39,882 (trainer:762) INFO: 37epoch:train:11101-11200batch: iter_time=8.328e-05, forward_time=0.322, loss_ctc=41.114, loss_att=37.902, acc=0.770, loss=38.866, backward_time=0.309, grad_norm=37.085, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.650e-04, train_time=1.310 +[gpub011:0/16] 2024-02-09 11:55:55,538 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-09 11:56:15,562 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 11:56:19,206 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 11:56:19,206 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-09 11:56:19,210 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 12:02:11,600 (trainer:762) INFO: 37epoch:train:11201-11300batch: iter_time=3.302, forward_time=0.341, loss_ctc=44.598, loss_att=43.782, acc=0.763, loss=44.027, backward_time=0.308, grad_norm=43.751, clip=100.000, loss_scale=3.738e+33, optim_step_time=0.096, optim0_lr0=1.650e-04, train_time=4.517 +[gpub011:0/16] 2024-02-09 12:04:17,074 (trainer:762) INFO: 37epoch:train:11301-11400batch: iter_time=8.608e-05, forward_time=0.290, loss_ctc=46.013, loss_att=41.559, acc=0.756, loss=42.895, backward_time=0.297, grad_norm=41.657, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.650e-04, train_time=1.254 +[gpub011:0/16] 2024-02-09 12:06:29,284 (trainer:762) INFO: 37epoch:train:11401-11500batch: iter_time=8.704e-05, forward_time=0.327, loss_ctc=47.041, loss_att=50.318, acc=0.758, loss=49.335, backward_time=0.313, grad_norm=41.410, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.649e-04, train_time=1.322 +[gpub011:0/16] 2024-02-09 12:08:45,155 (trainer:762) INFO: 37epoch:train:11501-11600batch: iter_time=9.602e-05, forward_time=0.340, loss_ctc=47.365, loss_att=44.565, acc=0.772, loss=45.405, backward_time=0.316, grad_norm=47.592, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.649e-04, train_time=1.358 +[gpub011:0/16] 2024-02-09 12:11:09,464 (trainer:762) INFO: 37epoch:train:11601-11700batch: iter_time=4.163e-04, forward_time=0.292, loss_ctc=51.449, loss_att=50.043, acc=0.756, loss=50.465, backward_time=0.304, grad_norm=45.892, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.649e-04, train_time=1.443 +[gpub011:0/16] 2024-02-09 12:13:07,605 (trainer:762) INFO: 37epoch:train:11701-11800batch: iter_time=8.567e-05, forward_time=0.359, loss_ctc=50.407, loss_att=51.300, acc=0.747, loss=51.032, backward_time=0.315, grad_norm=43.006, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.649e-04, train_time=1.181 +[gpub011:0/16] 2024-02-09 12:15:21,403 (trainer:762) INFO: 37epoch:train:11801-11900batch: iter_time=8.161e-05, forward_time=0.318, loss_ctc=44.680, loss_att=47.433, acc=0.734, loss=46.607, backward_time=0.319, grad_norm=45.782, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.649e-04, train_time=1.338 +[gpub011:0/16] 2024-02-09 12:17:35,814 (trainer:762) INFO: 37epoch:train:11901-12000batch: iter_time=8.517e-05, forward_time=0.291, loss_ctc=46.155, loss_att=43.636, acc=0.775, loss=44.392, backward_time=0.297, grad_norm=38.956, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.649e-04, train_time=1.344 +[gpub011:0/16] 2024-02-09 12:19:43,285 (trainer:762) INFO: 37epoch:train:12001-12100batch: iter_time=8.413e-05, forward_time=0.299, loss_ctc=46.801, loss_att=41.082, acc=0.762, loss=42.798, backward_time=0.299, grad_norm=38.759, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.649e-04, train_time=1.274 +[gpub011:0/16] 2024-02-09 12:22:01,286 (trainer:762) INFO: 37epoch:train:12101-12200batch: iter_time=8.502e-05, forward_time=0.345, loss_ctc=39.326, loss_att=38.256, acc=0.771, loss=38.577, backward_time=0.343, grad_norm=35.491, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.648e-04, train_time=1.380 +[gpub011:0/16] 2024-02-09 12:24:13,993 (trainer:762) INFO: 37epoch:train:12201-12300batch: iter_time=8.631e-05, forward_time=0.302, loss_ctc=49.272, loss_att=51.514, acc=0.761, loss=50.841, backward_time=0.304, grad_norm=46.747, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.648e-04, train_time=1.327 +[gpub011:0/16] 2024-02-09 12:26:28,464 (trainer:762) INFO: 37epoch:train:12301-12400batch: iter_time=8.456e-05, forward_time=0.289, loss_ctc=38.668, loss_att=35.353, acc=0.777, loss=36.347, backward_time=0.296, grad_norm=32.972, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.648e-04, train_time=1.344 +[gpub011:0/16] 2024-02-09 12:28:33,766 (trainer:762) INFO: 37epoch:train:12401-12500batch: iter_time=8.695e-05, forward_time=0.320, loss_ctc=45.772, loss_att=44.193, acc=0.741, loss=44.667, backward_time=0.323, grad_norm=42.198, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.648e-04, train_time=1.253 +[gpub011:0/16] 2024-02-09 12:28:53,827 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-09 12:29:13,164 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 12:29:16,707 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 12:29:16,707 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-09 12:29:16,712 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 12:36:04,079 (trainer:762) INFO: 37epoch:train:12501-12600batch: iter_time=3.227, forward_time=0.339, loss_ctc=47.072, loss_att=43.999, acc=0.773, loss=44.921, backward_time=0.307, grad_norm=45.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.648e-04, train_time=4.503 +[gpub011:0/16] 2024-02-09 12:38:04,696 (trainer:762) INFO: 37epoch:train:12601-12700batch: iter_time=7.642e-05, forward_time=0.290, loss_ctc=46.539, loss_att=41.178, acc=0.770, loss=42.787, backward_time=0.298, grad_norm=41.554, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.648e-04, train_time=1.206 +[gpub011:0/16] 2024-02-09 12:40:36,187 (trainer:762) INFO: 37epoch:train:12701-12800batch: iter_time=8.273e-05, forward_time=0.351, loss_ctc=47.366, loss_att=50.469, acc=0.769, loss=49.538, backward_time=0.310, grad_norm=40.734, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.648e-04, train_time=1.515 +[gpub011:0/16] 2024-02-09 12:41:58,447 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 12:42:46,010 (trainer:762) INFO: 37epoch:train:12801-12900batch: iter_time=7.394e-05, forward_time=0.347, loss_ctc=48.741, loss_att=44.404, acc=0.777, loss=45.705, backward_time=0.326, grad_norm=50.129, clip=100.000, loss_scale=4.222e+33, optim_step_time=0.095, optim0_lr0=1.647e-04, train_time=1.298 +[gpub011:0/16] 2024-02-09 12:44:52,402 (trainer:762) INFO: 37epoch:train:12901-13000batch: iter_time=7.680e-05, forward_time=0.299, loss_ctc=50.503, loss_att=52.548, acc=0.752, loss=51.934, backward_time=0.301, grad_norm=46.080, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.647e-04, train_time=1.264 +[gpub011:0/16] 2024-02-09 12:47:16,710 (trainer:762) INFO: 37epoch:train:13001-13100batch: iter_time=7.687e-05, forward_time=0.338, loss_ctc=46.607, loss_att=48.704, acc=0.770, loss=48.075, backward_time=0.320, grad_norm=42.842, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.647e-04, train_time=1.443 +[gpub011:0/16] 2024-02-09 12:49:14,008 (trainer:762) INFO: 37epoch:train:13101-13200batch: iter_time=1.670e-04, forward_time=0.326, loss_ctc=46.800, loss_att=47.314, acc=0.761, loss=47.160, backward_time=0.334, grad_norm=41.149, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.647e-04, train_time=1.173 +[gpub011:0/16] 2024-02-09 12:51:29,002 (trainer:762) INFO: 37epoch:train:13201-13300batch: iter_time=1.713e-04, forward_time=0.292, loss_ctc=41.094, loss_att=34.719, acc=0.789, loss=36.632, backward_time=0.303, grad_norm=36.692, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.647e-04, train_time=1.350 +[gpub011:0/16] 2024-02-09 12:53:59,617 (trainer:762) INFO: 37epoch:train:13301-13400batch: iter_time=7.933e-05, forward_time=0.290, loss_ctc=44.703, loss_att=40.409, acc=0.765, loss=41.697, backward_time=0.295, grad_norm=40.940, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.647e-04, train_time=1.506 +[gpub011:0/16] 2024-02-09 12:56:06,510 (trainer:762) INFO: 37epoch:train:13401-13500batch: iter_time=7.765e-05, forward_time=0.370, loss_ctc=43.652, loss_att=48.493, acc=0.760, loss=47.040, backward_time=0.327, grad_norm=43.856, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.646e-04, train_time=1.269 +[gpub011:0/16] 2024-02-09 12:58:08,781 (trainer:762) INFO: 37epoch:train:13501-13600batch: iter_time=8.325e-05, forward_time=0.293, loss_ctc=47.479, loss_att=45.177, acc=0.773, loss=45.868, backward_time=0.299, grad_norm=40.179, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.646e-04, train_time=1.223 +[gpub011:0/16] 2024-02-09 13:00:31,237 (trainer:762) INFO: 37epoch:train:13601-13700batch: iter_time=3.599e-04, forward_time=0.291, loss_ctc=41.245, loss_att=38.123, acc=0.770, loss=39.059, backward_time=0.299, grad_norm=38.754, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.646e-04, train_time=1.424 +[gpub011:0/16] 2024-02-09 13:01:47,237 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-09 13:02:06,943 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 13:02:10,476 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 13:02:10,476 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-09 13:02:10,545 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 13:07:56,403 (trainer:762) INFO: 37epoch:train:13701-13800batch: iter_time=3.259, forward_time=0.352, loss_ctc=44.650, loss_att=42.731, acc=0.767, loss=43.307, backward_time=0.308, grad_norm=42.008, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.646e-04, train_time=4.452 +[gpub011:0/16] 2024-02-09 13:10:02,236 (trainer:762) INFO: 37epoch:train:13801-13900batch: iter_time=8.755e-05, forward_time=0.291, loss_ctc=46.374, loss_att=41.314, acc=0.756, loss=42.832, backward_time=0.299, grad_norm=41.479, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.646e-04, train_time=1.258 +[gpub011:0/16] 2024-02-09 13:12:07,740 (trainer:762) INFO: 37epoch:train:13901-14000batch: iter_time=8.050e-05, forward_time=0.292, loss_ctc=47.315, loss_att=50.112, acc=0.758, loss=49.273, backward_time=0.300, grad_norm=42.388, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.646e-04, train_time=1.254 +[gpub011:0/16] 2024-02-09 13:14:21,500 (trainer:762) INFO: 37epoch:train:14001-14100batch: iter_time=8.163e-05, forward_time=0.383, loss_ctc=48.013, loss_att=44.493, acc=0.771, loss=45.549, backward_time=0.322, grad_norm=47.361, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.646e-04, train_time=1.337 +[gpub011:0/16] 2024-02-09 13:16:42,945 (trainer:762) INFO: 37epoch:train:14101-14200batch: iter_time=9.263e-05, forward_time=0.294, loss_ctc=51.464, loss_att=50.032, acc=0.756, loss=50.462, backward_time=0.300, grad_norm=46.660, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.645e-04, train_time=1.415 +[gpub011:0/16] 2024-02-09 13:19:06,793 (trainer:762) INFO: 37epoch:train:14201-14300batch: iter_time=9.265e-05, forward_time=0.353, loss_ctc=49.890, loss_att=50.550, acc=0.749, loss=50.352, backward_time=0.362, grad_norm=43.032, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.645e-04, train_time=1.438 +[gpub011:0/16] 2024-02-09 13:21:20,406 (trainer:762) INFO: 37epoch:train:14301-14400batch: iter_time=8.632e-05, forward_time=0.291, loss_ctc=44.120, loss_att=46.828, acc=0.736, loss=46.016, backward_time=0.298, grad_norm=46.418, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.645e-04, train_time=1.336 +[gpub011:0/16] 2024-02-09 13:23:37,568 (trainer:762) INFO: 37epoch:train:14401-14500batch: iter_time=8.927e-05, forward_time=0.381, loss_ctc=45.511, loss_att=42.906, acc=0.777, loss=43.688, backward_time=0.344, grad_norm=38.505, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.645e-04, train_time=1.371 +[gpub011:0/16] 2024-02-09 13:25:45,563 (trainer:762) INFO: 37epoch:train:14501-14600batch: iter_time=8.423e-04, forward_time=0.294, loss_ctc=46.844, loss_att=41.249, acc=0.761, loss=42.927, backward_time=0.300, grad_norm=41.945, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.645e-04, train_time=1.280 +[gpub011:0/16] 2024-02-09 13:28:08,605 (trainer:762) INFO: 37epoch:train:14601-14700batch: iter_time=9.315e-05, forward_time=0.326, loss_ctc=39.204, loss_att=38.154, acc=0.772, loss=38.469, backward_time=0.331, grad_norm=34.412, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.645e-04, train_time=1.431 +[gpub011:0/16] 2024-02-09 13:30:13,283 (trainer:762) INFO: 37epoch:train:14701-14800batch: iter_time=8.447e-05, forward_time=0.297, loss_ctc=48.716, loss_att=51.240, acc=0.762, loss=50.483, backward_time=0.305, grad_norm=46.512, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.645e-04, train_time=1.247 +[gpub011:0/16] 2024-02-09 13:32:18,316 (trainer:762) INFO: 37epoch:train:14801-14900batch: iter_time=9.178e-05, forward_time=0.288, loss_ctc=38.397, loss_att=35.201, acc=0.778, loss=36.160, backward_time=0.294, grad_norm=33.286, clip=100.000, loss_scale=3.557e+33, optim_step_time=0.093, optim0_lr0=1.644e-04, train_time=1.250 +[gpub011:0/16] 2024-02-09 13:34:43,636 (trainer:762) INFO: 37epoch:train:14901-15000batch: iter_time=9.125e-05, forward_time=0.388, loss_ctc=45.949, loss_att=44.229, acc=0.741, loss=44.745, backward_time=0.323, grad_norm=42.534, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.644e-04, train_time=1.453 +[gpub011:0/16] 2024-02-09 14:13:09,591 (trainer:361) INFO: 37epoch results: [train] iter_time=0.259, forward_time=0.317, loss_ctc=46.616, loss_att=44.903, acc=0.762, loss=45.417, backward_time=0.309, grad_norm=42.185, clip=100.000, loss_scale=4.712e+33, optim_step_time=0.095, optim0_lr0=1.655e-04, train_time=1.564, time=6 hours, 31 minutes and 30.46 seconds, total_count=585000, gpu_max_cached_mem_GB=43.805, [valid] loss_ctc=34.864, cer_ctc=0.179, loss_att=37.953, acc=0.690, cer=0.316, wer=0.989, loss=37.026, time=38 minutes and 1.9 seconds, total_count=182169, gpu_max_cached_mem_GB=43.805 +[gpub011:0/16] 2024-02-09 14:13:18,768 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-09 14:13:18,888 (trainer:290) INFO: 38/45epoch started. Estimated time to finish: 2 days, 10 hours and 48 minutes +[gpub011:0/16] 2024-02-09 14:13:18,898 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-09 14:13:37,064 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 14:13:40,487 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 14:13:40,487 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-09 14:13:40,491 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 14:20:27,005 (trainer:762) INFO: 38epoch:train:1-100batch: iter_time=3.085, forward_time=0.335, loss_ctc=50.711, loss_att=52.060, acc=0.739, loss=51.656, backward_time=0.316, grad_norm=46.173, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.644e-04, train_time=4.280 +[gpub011:0/16] 2024-02-09 14:22:23,885 (trainer:762) INFO: 38epoch:train:101-200batch: iter_time=1.067e-04, forward_time=0.310, loss_ctc=39.529, loss_att=41.018, acc=0.767, loss=40.571, backward_time=0.299, grad_norm=37.999, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.644e-04, train_time=1.169 +[gpub011:0/16] 2024-02-09 14:24:52,377 (trainer:762) INFO: 38epoch:train:201-300batch: iter_time=9.012e-05, forward_time=0.335, loss_ctc=50.934, loss_att=55.565, acc=0.742, loss=54.175, backward_time=0.325, grad_norm=47.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.644e-04, train_time=1.484 +[gpub011:0/16] 2024-02-09 14:26:55,294 (trainer:762) INFO: 38epoch:train:301-400batch: iter_time=9.441e-05, forward_time=0.320, loss_ctc=54.399, loss_att=52.075, acc=0.752, loss=52.772, backward_time=0.317, grad_norm=105.383, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.644e-04, train_time=1.229 +[gpub011:0/16] 2024-02-09 14:29:06,668 (trainer:762) INFO: 38epoch:train:401-500batch: iter_time=8.240e-05, forward_time=0.314, loss_ctc=49.023, loss_att=49.679, acc=0.761, loss=49.482, backward_time=0.329, grad_norm=40.255, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.644e-04, train_time=1.314 +[gpub011:0/16] 2024-02-09 14:31:18,659 (trainer:762) INFO: 38epoch:train:501-600batch: iter_time=9.154e-05, forward_time=0.340, loss_ctc=42.695, loss_att=42.274, acc=0.769, loss=42.400, backward_time=0.300, grad_norm=41.897, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.643e-04, train_time=1.320 +[gpub011:0/16] 2024-02-09 14:33:29,516 (trainer:762) INFO: 38epoch:train:601-700batch: iter_time=8.640e-05, forward_time=0.297, loss_ctc=61.925, loss_att=53.367, acc=0.743, loss=55.934, backward_time=0.304, grad_norm=59.956, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.643e-04, train_time=1.308 +[gpub011:0/16] 2024-02-09 14:35:43,972 (trainer:762) INFO: 38epoch:train:701-800batch: iter_time=8.098e-05, forward_time=0.323, loss_ctc=42.501, loss_att=39.940, acc=0.763, loss=40.708, backward_time=0.323, grad_norm=38.844, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.643e-04, train_time=1.345 +[gpub011:0/16] 2024-02-09 14:37:43,382 (trainer:762) INFO: 38epoch:train:801-900batch: iter_time=5.000e-04, forward_time=0.352, loss_ctc=58.417, loss_att=48.338, acc=0.771, loss=51.362, backward_time=0.308, grad_norm=57.544, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.643e-04, train_time=1.194 +[gpub011:0/16] 2024-02-09 14:40:03,155 (trainer:762) INFO: 38epoch:train:901-1000batch: iter_time=1.222e-04, forward_time=0.333, loss_ctc=55.732, loss_att=49.492, acc=0.758, loss=51.364, backward_time=0.320, grad_norm=55.106, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.643e-04, train_time=1.397 +[gpub011:0/16] 2024-02-09 14:42:21,918 (trainer:762) INFO: 38epoch:train:1001-1100batch: iter_time=8.742e-05, forward_time=0.324, loss_ctc=45.720, loss_att=46.849, acc=0.750, loss=46.510, backward_time=0.312, grad_norm=43.120, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.643e-04, train_time=1.388 +[gpub011:0/16] 2024-02-09 14:44:31,352 (trainer:762) INFO: 38epoch:train:1101-1200batch: iter_time=1.726e-04, forward_time=0.314, loss_ctc=51.363, loss_att=48.380, acc=0.753, loss=49.275, backward_time=0.324, grad_norm=46.239, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.642e-04, train_time=1.294 +[gpub011:0/16] 2024-02-09 14:45:50,253 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-09 14:46:09,827 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 14:46:13,432 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 14:46:13,432 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-09 14:46:13,440 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 14:52:01,891 (trainer:762) INFO: 38epoch:train:1201-1300batch: iter_time=3.117, forward_time=0.314, loss_ctc=49.256, loss_att=47.942, acc=0.749, loss=48.336, backward_time=0.304, grad_norm=42.078, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.642e-04, train_time=4.505 +[gpub011:0/16] 2024-02-09 14:54:00,268 (trainer:762) INFO: 38epoch:train:1301-1400batch: iter_time=8.479e-05, forward_time=0.327, loss_ctc=44.285, loss_att=48.232, acc=0.739, loss=47.048, backward_time=0.296, grad_norm=41.472, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.642e-04, train_time=1.183 +[gpub011:0/16] 2024-02-09 14:56:07,197 (trainer:762) INFO: 38epoch:train:1401-1500batch: iter_time=1.620e-04, forward_time=0.310, loss_ctc=42.172, loss_att=46.034, acc=0.760, loss=44.875, backward_time=0.311, grad_norm=40.599, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.642e-04, train_time=1.269 +[gpub011:0/16] 2024-02-09 14:58:17,707 (trainer:762) INFO: 38epoch:train:1501-1600batch: iter_time=9.265e-05, forward_time=0.305, loss_ctc=51.058, loss_att=46.044, acc=0.762, loss=47.549, backward_time=0.312, grad_norm=48.428, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.642e-04, train_time=1.305 +[gpub011:0/16] 2024-02-09 15:00:29,489 (trainer:762) INFO: 38epoch:train:1601-1700batch: iter_time=8.928e-05, forward_time=0.306, loss_ctc=50.236, loss_att=51.095, acc=0.740, loss=50.837, backward_time=0.297, grad_norm=45.788, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.642e-04, train_time=1.318 +[gpub011:0/16] 2024-02-09 15:02:26,242 (trainer:762) INFO: 38epoch:train:1701-1800batch: iter_time=9.377e-05, forward_time=0.326, loss_ctc=44.771, loss_att=44.994, acc=0.764, loss=44.927, backward_time=0.307, grad_norm=38.110, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.642e-04, train_time=1.167 +[gpub011:0/16] 2024-02-09 15:04:29,546 (trainer:762) INFO: 38epoch:train:1801-1900batch: iter_time=9.636e-05, forward_time=0.341, loss_ctc=46.195, loss_att=41.645, acc=0.760, loss=43.010, backward_time=0.317, grad_norm=44.370, clip=100.000, loss_scale=7.113e+33, optim_step_time=0.098, optim0_lr0=1.641e-04, train_time=1.233 +[gpub011:0/16] 2024-02-09 15:04:43,421 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 15:06:42,001 (trainer:762) INFO: 38epoch:train:1901-2000batch: iter_time=8.999e-05, forward_time=0.290, loss_ctc=54.430, loss_att=48.894, acc=0.748, loss=50.555, backward_time=0.294, grad_norm=56.261, clip=100.000, loss_scale=5.507e+33, optim_step_time=0.093, optim0_lr0=1.641e-04, train_time=1.325 +[gpub011:0/16] 2024-02-09 15:08:48,382 (trainer:762) INFO: 38epoch:train:2001-2100batch: iter_time=9.099e-05, forward_time=0.324, loss_ctc=52.708, loss_att=41.329, acc=0.769, loss=44.743, backward_time=0.301, grad_norm=46.918, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.641e-04, train_time=1.264 +[gpub011:0/16] 2024-02-09 15:10:52,687 (trainer:762) INFO: 38epoch:train:2101-2200batch: iter_time=9.298e-05, forward_time=0.328, loss_ctc=55.508, loss_att=52.514, acc=0.747, loss=53.412, backward_time=0.328, grad_norm=54.997, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.641e-04, train_time=1.242 +[gpub011:0/16] 2024-02-09 15:12:58,874 (trainer:762) INFO: 38epoch:train:2201-2300batch: iter_time=1.028e-04, forward_time=0.289, loss_ctc=43.394, loss_att=37.375, acc=0.766, loss=39.181, backward_time=0.293, grad_norm=39.296, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.641e-04, train_time=1.262 +[gpub011:0/16] 2024-02-09 15:15:04,592 (trainer:762) INFO: 38epoch:train:2301-2400batch: iter_time=8.276e-05, forward_time=0.325, loss_ctc=47.701, loss_att=47.883, acc=0.739, loss=47.828, backward_time=0.299, grad_norm=44.932, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.641e-04, train_time=1.257 +[gpub011:0/16] 2024-02-09 15:17:03,692 (trainer:762) INFO: 38epoch:train:2401-2500batch: iter_time=9.112e-05, forward_time=0.305, loss_ctc=48.949, loss_att=46.862, acc=0.761, loss=47.488, backward_time=0.301, grad_norm=43.782, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.641e-04, train_time=1.191 +[gpub011:0/16] 2024-02-09 15:17:23,721 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-09 15:17:43,040 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 15:17:46,847 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 15:17:46,848 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-09 15:17:46,851 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 15:24:12,968 (trainer:762) INFO: 38epoch:train:2501-2600batch: iter_time=3.032, forward_time=0.312, loss_ctc=49.458, loss_att=48.455, acc=0.740, loss=48.756, backward_time=0.305, grad_norm=43.699, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.640e-04, train_time=4.292 +[gpub011:0/16] 2024-02-09 15:26:09,883 (trainer:762) INFO: 38epoch:train:2601-2700batch: iter_time=8.713e-05, forward_time=0.325, loss_ctc=38.746, loss_att=38.090, acc=0.766, loss=38.286, backward_time=0.297, grad_norm=36.472, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.640e-04, train_time=1.169 +[gpub011:0/16] 2024-02-09 15:28:33,570 (trainer:762) INFO: 38epoch:train:2701-2800batch: iter_time=9.269e-05, forward_time=0.296, loss_ctc=49.484, loss_att=53.498, acc=0.742, loss=52.294, backward_time=0.299, grad_norm=43.950, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.640e-04, train_time=1.437 +[gpub011:0/16] 2024-02-09 15:30:47,426 (trainer:762) INFO: 38epoch:train:2801-2900batch: iter_time=9.068e-05, forward_time=0.350, loss_ctc=52.890, loss_att=48.943, acc=0.753, loss=50.127, backward_time=0.316, grad_norm=48.281, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.640e-04, train_time=1.338 +[gpub011:0/16] 2024-02-09 15:32:50,896 (trainer:762) INFO: 38epoch:train:2901-3000batch: iter_time=8.799e-05, forward_time=0.304, loss_ctc=47.596, loss_att=47.651, acc=0.759, loss=47.635, backward_time=0.300, grad_norm=38.406, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.640e-04, train_time=1.235 +[gpub011:0/16] 2024-02-09 15:34:58,555 (trainer:762) INFO: 38epoch:train:3001-3100batch: iter_time=9.639e-05, forward_time=0.323, loss_ctc=42.706, loss_att=41.886, acc=0.767, loss=42.132, backward_time=0.315, grad_norm=40.963, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.640e-04, train_time=1.276 +[gpub011:0/16] 2024-02-09 15:37:05,492 (trainer:762) INFO: 38epoch:train:3101-3200batch: iter_time=9.005e-05, forward_time=0.331, loss_ctc=55.428, loss_att=49.166, acc=0.742, loss=51.045, backward_time=0.315, grad_norm=56.830, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.640e-04, train_time=1.268 +[gpub011:0/16] 2024-02-09 15:37:35,893 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 15:39:26,084 (trainer:762) INFO: 38epoch:train:3201-3300batch: iter_time=5.322e-04, forward_time=0.344, loss_ctc=41.899, loss_att=39.056, acc=0.763, loss=39.909, backward_time=0.298, grad_norm=38.370, clip=100.000, loss_scale=2.885e+33, optim_step_time=0.093, optim0_lr0=1.639e-04, train_time=1.407 +[gpub011:0/16] 2024-02-09 15:41:25,969 (trainer:762) INFO: 38epoch:train:3301-3400batch: iter_time=8.098e-05, forward_time=0.302, loss_ctc=53.344, loss_att=45.212, acc=0.770, loss=47.652, backward_time=0.302, grad_norm=54.793, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.639e-04, train_time=1.199 +[gpub011:0/16] 2024-02-09 15:43:41,244 (trainer:762) INFO: 38epoch:train:3401-3500batch: iter_time=7.968e-05, forward_time=0.351, loss_ctc=52.904, loss_att=46.650, acc=0.760, loss=48.526, backward_time=0.342, grad_norm=51.861, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.639e-04, train_time=1.353 +[gpub011:0/16] 2024-02-09 15:45:57,995 (trainer:762) INFO: 38epoch:train:3501-3600batch: iter_time=9.039e-05, forward_time=0.333, loss_ctc=44.854, loss_att=45.794, acc=0.740, loss=45.512, backward_time=0.295, grad_norm=43.716, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.639e-04, train_time=1.367 +[gpub011:0/16] 2024-02-09 15:47:54,375 (trainer:762) INFO: 38epoch:train:3601-3700batch: iter_time=9.177e-05, forward_time=0.298, loss_ctc=49.612, loss_att=47.022, acc=0.748, loss=47.799, backward_time=0.302, grad_norm=44.762, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.639e-04, train_time=1.164 +[gpub011:0/16] 2024-02-09 15:49:09,838 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-09 15:49:29,202 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 15:49:32,780 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 15:49:32,780 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-09 15:49:32,784 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 15:55:24,389 (trainer:762) INFO: 38epoch:train:3701-3800batch: iter_time=3.271, forward_time=0.335, loss_ctc=49.038, loss_att=46.913, acc=0.756, loss=47.551, backward_time=0.307, grad_norm=40.087, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.639e-04, train_time=4.500 +[gpub011:0/16] 2024-02-09 15:57:30,845 (trainer:762) INFO: 38epoch:train:3801-3900batch: iter_time=8.239e-05, forward_time=0.337, loss_ctc=43.727, loss_att=48.240, acc=0.752, loss=46.886, backward_time=0.300, grad_norm=41.496, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.639e-04, train_time=1.264 +[gpub011:0/16] 2024-02-09 15:59:44,123 (trainer:762) INFO: 38epoch:train:3901-4000batch: iter_time=8.366e-05, forward_time=0.292, loss_ctc=41.570, loss_att=47.429, acc=0.762, loss=45.671, backward_time=0.297, grad_norm=39.264, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.638e-04, train_time=1.333 +[gpub011:0/16] 2024-02-09 16:02:00,352 (trainer:762) INFO: 38epoch:train:4001-4100batch: iter_time=9.395e-05, forward_time=0.342, loss_ctc=49.914, loss_att=48.827, acc=0.766, loss=49.153, backward_time=0.322, grad_norm=45.738, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.105, optim0_lr0=1.638e-04, train_time=1.362 +[gpub011:0/16] 2024-02-09 16:05:05,732 (trainer:762) INFO: 38epoch:train:4101-4200batch: iter_time=0.168, forward_time=0.364, loss_ctc=50.203, loss_att=51.515, acc=0.751, loss=51.121, backward_time=0.301, grad_norm=45.164, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.638e-04, train_time=1.853 +[gpub011:0/16] 2024-02-09 16:08:44,826 (trainer:762) INFO: 38epoch:train:4201-4300batch: iter_time=0.391, forward_time=0.290, loss_ctc=44.241, loss_att=44.823, acc=0.773, loss=44.648, backward_time=0.298, grad_norm=38.896, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.638e-04, train_time=2.191 +[gpub011:0/16] 2024-02-09 16:10:55,141 (trainer:762) INFO: 38epoch:train:4301-4400batch: iter_time=8.784e-05, forward_time=0.291, loss_ctc=46.079, loss_att=41.869, acc=0.767, loss=43.132, backward_time=0.301, grad_norm=44.128, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.638e-04, train_time=1.304 +[gpub011:0/16] 2024-02-09 16:14:32,788 (trainer:762) INFO: 38epoch:train:4401-4500batch: iter_time=0.066, forward_time=0.345, loss_ctc=51.686, loss_att=47.962, acc=0.758, loss=49.079, backward_time=0.335, grad_norm=51.167, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.638e-04, train_time=2.176 +[gpub011:0/16] 2024-02-09 16:16:43,874 (trainer:762) INFO: 38epoch:train:4501-4600batch: iter_time=9.928e-05, forward_time=0.322, loss_ctc=51.569, loss_att=42.327, acc=0.771, loss=45.100, backward_time=0.299, grad_norm=49.609, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.637e-04, train_time=1.310 +[gpub011:0/16] 2024-02-09 16:18:57,330 (trainer:762) INFO: 38epoch:train:4601-4700batch: iter_time=9.673e-05, forward_time=0.295, loss_ctc=54.865, loss_att=54.909, acc=0.753, loss=54.895, backward_time=0.301, grad_norm=55.574, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.637e-04, train_time=1.334 +[gpub011:0/16] 2024-02-09 16:22:44,813 (trainer:762) INFO: 38epoch:train:4701-4800batch: iter_time=9.873e-05, forward_time=0.408, loss_ctc=43.067, loss_att=39.059, acc=0.770, loss=40.262, backward_time=0.372, grad_norm=38.875, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.108, optim0_lr0=1.637e-04, train_time=2.275 +[gpub011:0/16] 2024-02-09 16:24:55,701 (trainer:762) INFO: 38epoch:train:4801-4900batch: iter_time=8.834e-05, forward_time=0.329, loss_ctc=47.670, loss_att=47.764, acc=0.753, loss=47.736, backward_time=0.305, grad_norm=45.765, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.637e-04, train_time=1.309 +[gpub011:0/16] 2024-02-09 16:26:58,983 (trainer:762) INFO: 38epoch:train:4901-5000batch: iter_time=8.183e-05, forward_time=0.294, loss_ctc=48.772, loss_att=47.362, acc=0.768, loss=47.785, backward_time=0.300, grad_norm=41.241, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.637e-04, train_time=1.232 +[gpub011:0/16] 2024-02-09 16:27:19,063 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-09 16:27:38,564 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 16:27:42,153 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 16:27:42,153 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-09 16:27:42,157 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 16:34:20,670 (trainer:762) INFO: 38epoch:train:5001-5100batch: iter_time=3.186, forward_time=0.352, loss_ctc=49.152, loss_att=50.267, acc=0.734, loss=49.933, backward_time=0.312, grad_norm=45.593, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.637e-04, train_time=4.417 +[gpub011:0/16] 2024-02-09 16:36:29,860 (trainer:762) INFO: 38epoch:train:5101-5200batch: iter_time=7.687e-05, forward_time=0.341, loss_ctc=38.292, loss_att=37.752, acc=0.770, loss=37.914, backward_time=0.297, grad_norm=35.883, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.637e-04, train_time=1.292 +[gpub011:0/16] 2024-02-09 16:38:42,134 (trainer:762) INFO: 38epoch:train:5201-5300batch: iter_time=8.207e-05, forward_time=0.292, loss_ctc=48.734, loss_att=52.724, acc=0.745, loss=51.527, backward_time=0.298, grad_norm=46.544, clip=100.000, loss_scale=4.881e+33, optim_step_time=0.093, optim0_lr0=1.636e-04, train_time=1.323 +[gpub011:0/16] 2024-02-09 16:40:46,920 (trainer:762) INFO: 38epoch:train:5301-5400batch: iter_time=8.249e-05, forward_time=0.297, loss_ctc=52.421, loss_att=49.502, acc=0.751, loss=50.378, backward_time=0.303, grad_norm=46.811, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.636e-04, train_time=1.247 +[gpub011:0/16] 2024-02-09 16:43:01,153 (trainer:762) INFO: 38epoch:train:5401-5500batch: iter_time=8.476e-05, forward_time=0.332, loss_ctc=47.013, loss_att=48.307, acc=0.757, loss=47.919, backward_time=0.343, grad_norm=39.753, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.636e-04, train_time=1.342 +[gpub011:0/16] 2024-02-09 16:45:19,017 (trainer:762) INFO: 38epoch:train:5501-5600batch: iter_time=9.205e-05, forward_time=0.334, loss_ctc=42.241, loss_att=41.661, acc=0.771, loss=41.835, backward_time=0.299, grad_norm=40.473, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.636e-04, train_time=1.379 +[gpub011:0/16] 2024-02-09 16:47:15,234 (trainer:762) INFO: 38epoch:train:5601-5700batch: iter_time=8.709e-05, forward_time=0.290, loss_ctc=54.084, loss_att=49.098, acc=0.744, loss=50.594, backward_time=0.297, grad_norm=57.659, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.636e-04, train_time=1.162 +[gpub011:0/16] 2024-02-09 16:49:28,248 (trainer:762) INFO: 38epoch:train:5701-5800batch: iter_time=2.506e-04, forward_time=0.354, loss_ctc=41.779, loss_att=38.401, acc=0.766, loss=39.415, backward_time=0.325, grad_norm=36.934, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.636e-04, train_time=1.329 +[gpub011:0/16] 2024-02-09 16:51:58,188 (trainer:762) INFO: 38epoch:train:5801-5900batch: iter_time=8.959e-05, forward_time=0.314, loss_ctc=52.517, loss_att=44.544, acc=0.773, loss=46.936, backward_time=0.317, grad_norm=51.083, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.636e-04, train_time=1.500 +[gpub011:0/16] 2024-02-09 16:54:07,352 (trainer:762) INFO: 38epoch:train:5901-6000batch: iter_time=8.947e-05, forward_time=0.291, loss_ctc=52.221, loss_att=46.199, acc=0.762, loss=48.006, backward_time=0.298, grad_norm=55.090, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.635e-04, train_time=1.292 +[gpub011:0/16] 2024-02-09 16:56:18,009 (trainer:762) INFO: 38epoch:train:6001-6100batch: iter_time=0.001, forward_time=0.298, loss_ctc=44.604, loss_att=45.467, acc=0.744, loss=45.208, backward_time=0.296, grad_norm=43.462, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.635e-04, train_time=1.306 +[gpub011:0/16] 2024-02-09 16:59:00,690 (trainer:762) INFO: 38epoch:train:6101-6200batch: iter_time=8.862e-05, forward_time=0.418, loss_ctc=49.698, loss_att=47.372, acc=0.748, loss=48.070, backward_time=0.316, grad_norm=46.566, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.635e-04, train_time=1.626 +[gpub011:0/16] 2024-02-09 17:00:15,329 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-09 17:00:35,265 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 17:00:39,220 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 17:00:39,220 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-09 17:00:39,223 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 17:07:04,015 (trainer:762) INFO: 38epoch:train:6201-6300batch: iter_time=3.685, forward_time=0.290, loss_ctc=48.670, loss_att=45.921, acc=0.753, loss=46.746, backward_time=0.298, grad_norm=42.595, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.635e-04, train_time=4.834 +[gpub011:0/16] 2024-02-09 17:09:15,198 (trainer:762) INFO: 38epoch:train:6301-6400batch: iter_time=8.381e-05, forward_time=0.290, loss_ctc=43.541, loss_att=45.804, acc=0.748, loss=45.125, backward_time=0.296, grad_norm=40.530, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.635e-04, train_time=1.312 +[gpub011:0/16] 2024-02-09 17:11:40,964 (trainer:762) INFO: 38epoch:train:6401-6500batch: iter_time=8.560e-05, forward_time=0.415, loss_ctc=41.491, loss_att=45.439, acc=0.764, loss=44.255, backward_time=0.323, grad_norm=39.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.635e-04, train_time=1.457 +[gpub011:0/16] 2024-02-09 17:13:45,312 (trainer:762) INFO: 38epoch:train:6501-6600batch: iter_time=8.239e-05, forward_time=0.292, loss_ctc=49.304, loss_att=44.639, acc=0.769, loss=46.039, backward_time=0.299, grad_norm=45.383, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.635e-04, train_time=1.243 +[gpub011:0/16] 2024-02-09 17:16:23,549 (trainer:762) INFO: 38epoch:train:6601-6700batch: iter_time=2.886e-04, forward_time=0.350, loss_ctc=49.795, loss_att=49.608, acc=0.744, loss=49.664, backward_time=0.384, grad_norm=43.854, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.634e-04, train_time=1.583 +[gpub011:0/16] 2024-02-09 17:18:16,245 (trainer:762) INFO: 38epoch:train:6701-6800batch: iter_time=8.510e-05, forward_time=0.290, loss_ctc=43.432, loss_att=44.354, acc=0.769, loss=44.077, backward_time=0.298, grad_norm=41.570, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.634e-04, train_time=1.126 +[gpub011:0/16] 2024-02-09 17:20:28,757 (trainer:762) INFO: 38epoch:train:6801-6900batch: iter_time=8.384e-05, forward_time=0.289, loss_ctc=45.558, loss_att=40.399, acc=0.765, loss=41.946, backward_time=0.296, grad_norm=40.886, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.634e-04, train_time=1.325 +[gpub011:0/16] 2024-02-09 17:22:43,880 (trainer:762) INFO: 38epoch:train:6901-7000batch: iter_time=9.079e-04, forward_time=0.302, loss_ctc=51.293, loss_att=49.032, acc=0.753, loss=49.711, backward_time=0.296, grad_norm=55.155, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.634e-04, train_time=1.351 +[gpub011:0/16] 2024-02-09 17:24:58,989 (trainer:762) INFO: 38epoch:train:7001-7100batch: iter_time=8.427e-05, forward_time=0.361, loss_ctc=51.655, loss_att=41.537, acc=0.772, loss=44.573, backward_time=0.352, grad_norm=49.383, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.634e-04, train_time=1.351 +[gpub011:0/16] 2024-02-09 17:27:20,203 (trainer:762) INFO: 38epoch:train:7101-7200batch: iter_time=8.422e-05, forward_time=0.294, loss_ctc=53.391, loss_att=52.400, acc=0.749, loss=52.697, backward_time=0.300, grad_norm=55.303, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.634e-04, train_time=1.412 +[gpub011:0/16] 2024-02-09 17:29:30,969 (trainer:762) INFO: 38epoch:train:7201-7300batch: iter_time=2.710e-04, forward_time=0.291, loss_ctc=42.495, loss_att=36.671, acc=0.772, loss=38.418, backward_time=0.299, grad_norm=39.632, clip=100.000, loss_scale=9.762e+33, optim_step_time=0.093, optim0_lr0=1.634e-04, train_time=1.307 +[gpub011:0/16] 2024-02-09 17:31:52,762 (trainer:762) INFO: 38epoch:train:7301-7400batch: iter_time=1.447e-04, forward_time=0.425, loss_ctc=47.240, loss_att=47.370, acc=0.743, loss=47.331, backward_time=0.329, grad_norm=45.636, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.633e-04, train_time=1.417 +[gpub011:0/16] 2024-02-09 17:34:14,600 (trainer:762) INFO: 38epoch:train:7401-7500batch: iter_time=8.681e-05, forward_time=0.289, loss_ctc=48.514, loss_att=46.311, acc=0.764, loss=46.972, backward_time=0.296, grad_norm=41.543, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.633e-04, train_time=1.419 +[gpub011:0/16] 2024-02-09 17:34:34,670 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-09 17:34:54,083 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 17:34:57,709 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 17:34:57,709 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-09 17:34:57,712 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 17:42:43,116 (trainer:762) INFO: 38epoch:train:7501-7600batch: iter_time=3.697, forward_time=0.291, loss_ctc=47.991, loss_att=50.460, acc=0.745, loss=49.719, backward_time=0.299, grad_norm=44.699, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.633e-04, train_time=5.085 +[gpub011:0/16] 2024-02-09 17:45:14,571 (trainer:762) INFO: 38epoch:train:7601-7700batch: iter_time=8.212e-05, forward_time=0.353, loss_ctc=37.544, loss_att=39.122, acc=0.777, loss=38.649, backward_time=0.363, grad_norm=34.154, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.633e-04, train_time=1.514 +[gpub011:0/16] 2024-02-09 17:47:28,570 (trainer:762) INFO: 38epoch:train:7701-7800batch: iter_time=8.421e-05, forward_time=0.293, loss_ctc=48.761, loss_att=54.865, acc=0.748, loss=53.034, backward_time=0.299, grad_norm=45.068, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.633e-04, train_time=1.339 +[gpub011:0/16] 2024-02-09 17:49:33,547 (trainer:762) INFO: 38epoch:train:7801-7900batch: iter_time=3.547e-04, forward_time=0.297, loss_ctc=51.724, loss_att=49.701, acc=0.760, loss=50.308, backward_time=0.301, grad_norm=48.780, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.633e-04, train_time=1.250 +[gpub011:0/16] 2024-02-09 17:51:35,652 (trainer:762) INFO: 38epoch:train:7901-8000batch: iter_time=8.234e-05, forward_time=0.305, loss_ctc=47.169, loss_att=49.069, acc=0.766, loss=48.499, backward_time=0.307, grad_norm=39.609, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.633e-04, train_time=1.221 +[gpub011:0/16] 2024-02-09 17:54:15,903 (trainer:762) INFO: 38epoch:train:8001-8100batch: iter_time=8.382e-05, forward_time=0.396, loss_ctc=42.255, loss_att=41.560, acc=0.777, loss=41.769, backward_time=0.312, grad_norm=41.249, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.632e-04, train_time=1.601 +[gpub011:0/16] 2024-02-09 17:56:17,803 (trainer:762) INFO: 38epoch:train:8101-8200batch: iter_time=3.785e-04, forward_time=0.318, loss_ctc=53.769, loss_att=50.261, acc=0.749, loss=51.314, backward_time=0.304, grad_norm=57.521, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.632e-04, train_time=1.220 +[gpub011:0/16] 2024-02-09 17:58:38,050 (trainer:762) INFO: 38epoch:train:8201-8300batch: iter_time=8.441e-05, forward_time=0.291, loss_ctc=41.179, loss_att=39.551, acc=0.769, loss=40.039, backward_time=0.297, grad_norm=36.505, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.632e-04, train_time=1.402 +[gpub011:0/16] 2024-02-09 18:01:19,017 (trainer:762) INFO: 38epoch:train:8301-8400batch: iter_time=8.758e-05, forward_time=0.422, loss_ctc=52.701, loss_att=46.783, acc=0.775, loss=48.559, backward_time=0.329, grad_norm=49.383, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.632e-04, train_time=1.609 +[gpub011:0/16] 2024-02-09 18:03:19,242 (trainer:762) INFO: 38epoch:train:8401-8500batch: iter_time=8.269e-05, forward_time=0.295, loss_ctc=52.414, loss_att=48.402, acc=0.764, loss=49.605, backward_time=0.304, grad_norm=49.983, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.632e-04, train_time=1.202 +[gpub011:0/16] 2024-02-09 18:05:36,478 (trainer:762) INFO: 38epoch:train:8501-8600batch: iter_time=8.323e-05, forward_time=0.293, loss_ctc=43.715, loss_att=45.841, acc=0.757, loss=45.203, backward_time=0.298, grad_norm=41.802, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.632e-04, train_time=1.372 +[gpub011:0/16] 2024-02-09 18:08:23,627 (trainer:762) INFO: 38epoch:train:8601-8700batch: iter_time=8.486e-05, forward_time=0.405, loss_ctc=49.582, loss_att=47.493, acc=0.759, loss=48.120, backward_time=0.324, grad_norm=45.766, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.119, optim0_lr0=1.632e-04, train_time=1.671 +[gpub011:0/16] 2024-02-09 18:09:45,183 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-09 18:10:04,948 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 18:10:08,552 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 18:10:08,553 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-09 18:10:08,565 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 18:16:44,049 (trainer:762) INFO: 38epoch:train:8701-8800batch: iter_time=3.356, forward_time=0.291, loss_ctc=48.831, loss_att=47.377, acc=0.754, loss=47.813, backward_time=0.297, grad_norm=41.730, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.631e-04, train_time=5.003 +[gpub011:0/16] 2024-02-09 18:18:43,282 (trainer:762) INFO: 38epoch:train:8801-8900batch: iter_time=1.003e-04, forward_time=0.291, loss_ctc=43.382, loss_att=47.310, acc=0.746, loss=46.132, backward_time=0.301, grad_norm=39.924, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.631e-04, train_time=1.193 +[gpub011:0/16] 2024-02-09 18:21:00,806 (trainer:762) INFO: 38epoch:train:8901-9000batch: iter_time=1.003e-04, forward_time=0.370, loss_ctc=41.275, loss_att=45.814, acc=0.764, loss=44.452, backward_time=0.377, grad_norm=40.314, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.631e-04, train_time=1.375 +[gpub011:0/16] 2024-02-09 18:23:33,619 (trainer:762) INFO: 38epoch:train:9001-9100batch: iter_time=8.283e-05, forward_time=0.319, loss_ctc=49.146, loss_att=44.519, acc=0.769, loss=45.907, backward_time=0.298, grad_norm=45.392, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.631e-04, train_time=1.527 +[gpub011:0/16] 2024-02-09 18:25:37,377 (trainer:762) INFO: 38epoch:train:9101-9200batch: iter_time=8.247e-05, forward_time=0.295, loss_ctc=49.106, loss_att=49.483, acc=0.747, loss=49.370, backward_time=0.301, grad_norm=44.126, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.631e-04, train_time=1.237 +[gpub011:0/16] 2024-02-09 18:26:49,758 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 18:27:49,047 (trainer:762) INFO: 38epoch:train:9201-9300batch: iter_time=9.870e-05, forward_time=0.402, loss_ctc=43.690, loss_att=43.925, acc=0.772, loss=43.855, backward_time=0.322, grad_norm=38.287, clip=100.000, loss_scale=1.374e+34, optim_step_time=0.102, optim0_lr0=1.631e-04, train_time=1.317 +[gpub011:0/16] 2024-02-09 18:30:38,075 (trainer:762) INFO: 38epoch:train:9301-9400batch: iter_time=9.435e-05, forward_time=0.295, loss_ctc=46.015, loss_att=41.595, acc=0.761, loss=42.921, backward_time=0.294, grad_norm=43.855, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.631e-04, train_time=1.690 +[gpub011:0/16] 2024-02-09 18:32:42,573 (trainer:762) INFO: 38epoch:train:9401-9500batch: iter_time=9.541e-05, forward_time=0.293, loss_ctc=50.671, loss_att=47.182, acc=0.752, loss=48.229, backward_time=0.296, grad_norm=52.897, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.630e-04, train_time=1.245 +[gpub011:0/16] 2024-02-09 18:34:56,139 (trainer:762) INFO: 38epoch:train:9501-9600batch: iter_time=8.780e-05, forward_time=0.395, loss_ctc=50.039, loss_att=42.772, acc=0.771, loss=44.952, backward_time=0.320, grad_norm=52.564, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=1.630e-04, train_time=1.335 +[gpub011:0/16] 2024-02-09 18:37:37,033 (trainer:762) INFO: 38epoch:train:9601-9700batch: iter_time=9.266e-05, forward_time=0.295, loss_ctc=53.535, loss_att=52.049, acc=0.748, loss=52.495, backward_time=0.298, grad_norm=53.093, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.630e-04, train_time=1.608 +[gpub011:0/16] 2024-02-09 18:39:55,613 (trainer:762) INFO: 38epoch:train:9701-9800batch: iter_time=9.223e-05, forward_time=0.444, loss_ctc=42.156, loss_att=36.780, acc=0.772, loss=38.393, backward_time=0.320, grad_norm=38.014, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.630e-04, train_time=1.387 +[gpub011:0/16] 2024-02-09 18:41:49,819 (trainer:762) INFO: 38epoch:train:9801-9900batch: iter_time=8.538e-05, forward_time=0.292, loss_ctc=46.561, loss_att=46.498, acc=0.745, loss=46.517, backward_time=0.301, grad_norm=45.370, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.630e-04, train_time=1.141 +[gpub011:0/16] 2024-02-09 18:44:21,774 (trainer:762) INFO: 38epoch:train:9901-10000batch: iter_time=8.048e-05, forward_time=0.301, loss_ctc=47.998, loss_att=46.466, acc=0.765, loss=46.925, backward_time=0.300, grad_norm=40.199, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.630e-04, train_time=1.517 +[gpub011:0/16] 2024-02-09 18:44:41,834 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-09 18:45:01,037 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 18:45:04,632 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 18:45:04,632 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-09 18:45:04,635 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 18:52:40,548 (trainer:762) INFO: 38epoch:train:10001-10100batch: iter_time=3.616, forward_time=0.389, loss_ctc=48.348, loss_att=50.029, acc=0.746, loss=49.525, backward_time=0.348, grad_norm=42.721, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.629e-04, train_time=4.990 +[gpub011:0/16] 2024-02-09 18:55:00,661 (trainer:762) INFO: 38epoch:train:10101-10200batch: iter_time=8.480e-05, forward_time=0.288, loss_ctc=37.963, loss_att=39.284, acc=0.778, loss=38.888, backward_time=0.294, grad_norm=34.293, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.629e-04, train_time=1.401 +[gpub011:0/16] 2024-02-09 18:57:02,586 (trainer:762) INFO: 38epoch:train:10201-10300batch: iter_time=8.871e-05, forward_time=0.300, loss_ctc=48.516, loss_att=54.730, acc=0.749, loss=52.866, backward_time=0.308, grad_norm=43.805, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.629e-04, train_time=1.218 +[gpub011:0/16] 2024-02-09 18:59:18,778 (trainer:762) INFO: 38epoch:train:10301-10400batch: iter_time=9.071e-05, forward_time=0.293, loss_ctc=51.807, loss_att=49.079, acc=0.764, loss=49.897, backward_time=0.299, grad_norm=46.071, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.629e-04, train_time=1.363 +[gpub011:0/16] 2024-02-09 19:01:56,187 (trainer:762) INFO: 38epoch:train:10401-10500batch: iter_time=8.723e-05, forward_time=0.356, loss_ctc=47.037, loss_att=47.940, acc=0.771, loss=47.669, backward_time=0.383, grad_norm=39.627, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.629e-04, train_time=1.574 +[gpub011:0/16] 2024-02-09 19:02:58,671 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 19:04:03,443 (trainer:762) INFO: 38epoch:train:10501-10600batch: iter_time=8.952e-05, forward_time=0.293, loss_ctc=41.817, loss_att=41.285, acc=0.777, loss=41.445, backward_time=0.297, grad_norm=40.890, clip=100.000, loss_scale=8.024e+33, optim_step_time=0.094, optim0_lr0=1.629e-04, train_time=1.272 +[gpub011:0/16] 2024-02-09 19:06:04,019 (trainer:762) INFO: 38epoch:train:10601-10700batch: iter_time=8.315e-05, forward_time=0.293, loss_ctc=53.434, loss_att=48.842, acc=0.753, loss=50.220, backward_time=0.299, grad_norm=55.672, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.629e-04, train_time=1.205 +[gpub011:0/16] 2024-02-09 19:09:03,918 (trainer:762) INFO: 38epoch:train:10701-10800batch: iter_time=9.565e-05, forward_time=0.423, loss_ctc=41.182, loss_att=39.723, acc=0.768, loss=40.161, backward_time=0.320, grad_norm=37.166, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.628e-04, train_time=1.800 +[gpub011:0/16] 2024-02-09 19:11:09,039 (trainer:762) INFO: 38epoch:train:10801-10900batch: iter_time=9.252e-05, forward_time=0.298, loss_ctc=51.683, loss_att=45.453, acc=0.781, loss=47.322, backward_time=0.305, grad_norm=49.354, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.628e-04, train_time=1.251 +[gpub011:0/16] 2024-02-09 19:13:58,869 (trainer:762) INFO: 38epoch:train:10901-11000batch: iter_time=8.669e-05, forward_time=0.292, loss_ctc=51.234, loss_att=47.251, acc=0.765, loss=48.446, backward_time=0.298, grad_norm=46.512, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.628e-04, train_time=1.697 +[gpub011:0/16] 2024-02-09 19:16:09,062 (trainer:762) INFO: 38epoch:train:11001-11100batch: iter_time=8.757e-05, forward_time=0.374, loss_ctc=44.072, loss_att=45.453, acc=0.757, loss=45.039, backward_time=0.381, grad_norm=42.648, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.628e-04, train_time=1.303 +[gpub011:0/16] 2024-02-09 19:18:39,109 (trainer:762) INFO: 38epoch:train:11101-11200batch: iter_time=8.224e-05, forward_time=0.294, loss_ctc=49.022, loss_att=47.191, acc=0.759, loss=47.741, backward_time=0.299, grad_norm=43.473, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.628e-04, train_time=1.500 +[gpub011:0/16] 2024-02-09 19:20:04,774 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-09 19:20:24,130 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 19:20:27,699 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 19:20:27,699 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-09 19:20:27,702 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 19:27:18,471 (trainer:762) INFO: 38epoch:train:11201-11300batch: iter_time=3.828, forward_time=0.399, loss_ctc=48.312, loss_att=45.491, acc=0.765, loss=46.337, backward_time=0.318, grad_norm=40.706, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.628e-04, train_time=5.193 +[gpub011:0/16] 2024-02-09 19:29:36,498 (trainer:762) INFO: 38epoch:train:11301-11400batch: iter_time=8.099e-05, forward_time=0.306, loss_ctc=43.400, loss_att=46.671, acc=0.758, loss=45.690, backward_time=0.298, grad_norm=39.681, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.628e-04, train_time=1.380 +[gpub011:0/16] 2024-02-09 19:31:55,648 (trainer:762) INFO: 38epoch:train:11401-11500batch: iter_time=8.210e-05, forward_time=0.292, loss_ctc=41.333, loss_att=46.696, acc=0.768, loss=45.087, backward_time=0.295, grad_norm=38.731, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.627e-04, train_time=1.391 +[gpub011:0/16] 2024-02-09 19:34:54,114 (trainer:762) INFO: 38epoch:train:11501-11600batch: iter_time=8.917e-05, forward_time=0.420, loss_ctc=49.302, loss_att=47.513, acc=0.769, loss=48.050, backward_time=0.324, grad_norm=42.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.627e-04, train_time=1.784 +[gpub011:0/16] 2024-02-09 19:36:50,133 (trainer:762) INFO: 38epoch:train:11601-11700batch: iter_time=4.398e-04, forward_time=0.310, loss_ctc=49.319, loss_att=49.534, acc=0.760, loss=49.470, backward_time=0.301, grad_norm=41.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.627e-04, train_time=1.160 +[gpub011:0/16] 2024-02-09 19:39:42,988 (trainer:762) INFO: 38epoch:train:11701-11800batch: iter_time=8.287e-05, forward_time=0.292, loss_ctc=43.350, loss_att=44.309, acc=0.776, loss=44.021, backward_time=0.296, grad_norm=37.521, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.627e-04, train_time=1.728 +[gpub011:0/16] 2024-02-09 19:42:00,869 (trainer:762) INFO: 38epoch:train:11801-11900batch: iter_time=8.406e-05, forward_time=0.354, loss_ctc=45.410, loss_att=41.069, acc=0.772, loss=42.371, backward_time=0.362, grad_norm=43.690, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.627e-04, train_time=1.379 +[gpub011:0/16] 2024-02-09 19:44:25,029 (trainer:762) INFO: 38epoch:train:11901-12000batch: iter_time=3.980e-04, forward_time=0.291, loss_ctc=49.426, loss_att=47.852, acc=0.759, loss=48.324, backward_time=0.306, grad_norm=53.348, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.627e-04, train_time=1.441 +[gpub011:0/16] 2024-02-09 19:46:44,478 (trainer:762) INFO: 38epoch:train:12001-12100batch: iter_time=8.192e-05, forward_time=0.298, loss_ctc=50.051, loss_att=41.355, acc=0.776, loss=43.964, backward_time=0.297, grad_norm=48.253, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.627e-04, train_time=1.394 +[gpub011:0/16] 2024-02-09 19:49:01,644 (trainer:762) INFO: 38epoch:train:12101-12200batch: iter_time=8.284e-05, forward_time=0.394, loss_ctc=52.752, loss_att=53.458, acc=0.759, loss=53.246, backward_time=0.362, grad_norm=54.596, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.626e-04, train_time=1.371 +[gpub011:0/16] 2024-02-09 19:51:24,034 (trainer:762) INFO: 38epoch:train:12201-12300batch: iter_time=8.699e-05, forward_time=0.296, loss_ctc=42.092, loss_att=38.462, acc=0.774, loss=39.551, backward_time=0.294, grad_norm=205.017, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.626e-04, train_time=1.424 +[gpub011:0/16] 2024-02-09 19:53:21,648 (trainer:762) INFO: 38epoch:train:12301-12400batch: iter_time=8.272e-05, forward_time=0.295, loss_ctc=46.374, loss_att=46.627, acc=0.756, loss=46.551, backward_time=0.301, grad_norm=43.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.626e-04, train_time=1.176 +[gpub011:0/16] 2024-02-09 19:55:35,814 (trainer:762) INFO: 38epoch:train:12401-12500batch: iter_time=8.124e-05, forward_time=0.396, loss_ctc=48.458, loss_att=47.029, acc=0.771, loss=47.458, backward_time=0.328, grad_norm=41.899, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.626e-04, train_time=1.342 +[gpub011:0/16] 2024-02-09 19:55:55,841 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-09 19:56:15,691 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 19:56:19,529 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 19:56:19,529 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-09 19:56:19,533 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 20:02:49,736 (trainer:762) INFO: 38epoch:train:12501-12600batch: iter_time=3.063, forward_time=0.293, loss_ctc=48.159, loss_att=50.439, acc=0.737, loss=49.755, backward_time=0.298, grad_norm=45.635, clip=100.000, loss_scale=7.529e+33, optim_step_time=0.093, optim0_lr0=1.626e-04, train_time=4.339 +[gpub011:0/16] 2024-02-09 20:04:04,929 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 20:05:04,806 (trainer:762) INFO: 38epoch:train:12601-12700batch: iter_time=2.654e-04, forward_time=0.374, loss_ctc=37.933, loss_att=38.278, acc=0.769, loss=38.174, backward_time=0.350, grad_norm=35.435, clip=100.000, loss_scale=7.500e+33, optim_step_time=0.097, optim0_lr0=1.626e-04, train_time=1.351 +[gpub011:0/16] 2024-02-09 20:07:20,774 (trainer:762) INFO: 38epoch:train:12701-12800batch: iter_time=8.393e-05, forward_time=0.293, loss_ctc=47.793, loss_att=53.161, acc=0.744, loss=51.551, backward_time=0.297, grad_norm=44.928, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.626e-04, train_time=1.359 +[gpub011:0/16] 2024-02-09 20:09:36,130 (trainer:762) INFO: 38epoch:train:12801-12900batch: iter_time=2.308e-04, forward_time=0.392, loss_ctc=51.231, loss_att=49.296, acc=0.755, loss=49.876, backward_time=0.348, grad_norm=47.658, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.625e-04, train_time=1.353 +[gpub011:0/16] 2024-02-09 20:11:31,531 (trainer:762) INFO: 38epoch:train:12901-13000batch: iter_time=8.257e-05, forward_time=0.300, loss_ctc=46.945, loss_att=47.785, acc=0.760, loss=47.533, backward_time=0.300, grad_norm=39.259, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.625e-04, train_time=1.154 +[gpub011:0/16] 2024-02-09 20:14:16,593 (trainer:762) INFO: 38epoch:train:13001-13100batch: iter_time=2.716e-04, forward_time=0.355, loss_ctc=41.819, loss_att=41.512, acc=0.772, loss=41.604, backward_time=0.353, grad_norm=41.151, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.625e-04, train_time=1.650 +[gpub011:0/16] 2024-02-09 20:16:16,152 (trainer:762) INFO: 38epoch:train:13101-13200batch: iter_time=8.573e-05, forward_time=0.297, loss_ctc=53.535, loss_att=48.613, acc=0.746, loss=50.089, backward_time=0.300, grad_norm=57.200, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.625e-04, train_time=1.196 +[gpub011:0/16] 2024-02-09 20:18:26,154 (trainer:762) INFO: 38epoch:train:13201-13300batch: iter_time=8.567e-05, forward_time=0.385, loss_ctc=40.897, loss_att=38.422, acc=0.767, loss=39.164, backward_time=0.340, grad_norm=36.303, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.625e-04, train_time=1.300 +[gpub011:0/16] 2024-02-09 20:20:57,099 (trainer:762) INFO: 38epoch:train:13301-13400batch: iter_time=4.039e-04, forward_time=0.292, loss_ctc=51.647, loss_att=45.007, acc=0.774, loss=46.999, backward_time=0.301, grad_norm=51.543, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.625e-04, train_time=1.508 +[gpub011:0/16] 2024-02-09 20:23:12,231 (trainer:762) INFO: 38epoch:train:13401-13500batch: iter_time=8.220e-05, forward_time=0.402, loss_ctc=52.129, loss_att=46.053, acc=0.764, loss=47.876, backward_time=0.337, grad_norm=52.001, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.625e-04, train_time=1.351 +[gpub011:0/16] 2024-02-09 20:25:27,890 (trainer:762) INFO: 38epoch:train:13501-13600batch: iter_time=2.495e-04, forward_time=0.299, loss_ctc=43.480, loss_att=45.537, acc=0.745, loss=44.920, backward_time=0.298, grad_norm=41.915, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.624e-04, train_time=1.357 +[gpub011:0/16] 2024-02-09 20:27:46,822 (trainer:762) INFO: 38epoch:train:13601-13700batch: iter_time=8.175e-05, forward_time=0.386, loss_ctc=49.022, loss_att=46.921, acc=0.750, loss=47.551, backward_time=0.335, grad_norm=45.396, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.624e-04, train_time=1.388 +[gpub011:0/16] 2024-02-09 20:29:07,703 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-09 20:29:27,683 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 20:29:31,240 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 20:29:31,240 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-09 20:29:31,244 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 20:35:25,344 (trainer:762) INFO: 38epoch:train:13701-13800batch: iter_time=3.332, forward_time=0.295, loss_ctc=48.113, loss_att=46.007, acc=0.762, loss=46.639, backward_time=0.304, grad_norm=41.705, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.624e-04, train_time=4.586 +[gpub011:0/16] 2024-02-09 20:37:30,450 (trainer:762) INFO: 38epoch:train:13801-13900batch: iter_time=7.986e-05, forward_time=0.338, loss_ctc=43.065, loss_att=47.601, acc=0.756, loss=46.240, backward_time=0.334, grad_norm=39.587, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.624e-04, train_time=1.251 +[gpub011:0/16] 2024-02-09 20:39:52,485 (trainer:762) INFO: 38epoch:train:13901-14000batch: iter_time=8.009e-05, forward_time=0.327, loss_ctc=40.629, loss_att=46.625, acc=0.770, loss=44.826, backward_time=0.297, grad_norm=40.167, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.624e-04, train_time=1.420 +[gpub011:0/16] 2024-02-09 20:42:34,450 (trainer:762) INFO: 38epoch:train:14001-14100batch: iter_time=8.254e-05, forward_time=0.303, loss_ctc=49.952, loss_att=48.537, acc=0.770, loss=48.962, backward_time=0.297, grad_norm=44.634, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.624e-04, train_time=1.619 +[gpub011:0/16] 2024-02-09 20:44:36,360 (trainer:762) INFO: 38epoch:train:14101-14200batch: iter_time=8.034e-05, forward_time=0.295, loss_ctc=49.437, loss_att=49.509, acc=0.760, loss=49.488, backward_time=0.307, grad_norm=44.012, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.624e-04, train_time=1.219 +[gpub011:0/16] 2024-02-09 20:47:08,505 (trainer:762) INFO: 38epoch:train:14201-14300batch: iter_time=8.197e-05, forward_time=0.355, loss_ctc=43.702, loss_att=44.262, acc=0.778, loss=44.094, backward_time=0.345, grad_norm=39.270, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.623e-04, train_time=1.521 +[gpub011:0/16] 2024-02-09 20:49:28,643 (trainer:762) INFO: 38epoch:train:14301-14400batch: iter_time=1.511e-04, forward_time=0.305, loss_ctc=45.068, loss_att=40.924, acc=0.772, loss=42.167, backward_time=0.300, grad_norm=41.093, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.623e-04, train_time=1.401 +[gpub011:0/16] 2024-02-09 20:51:27,275 (trainer:762) INFO: 38epoch:train:14401-14500batch: iter_time=8.626e-05, forward_time=0.291, loss_ctc=50.158, loss_att=49.154, acc=0.759, loss=49.456, backward_time=0.296, grad_norm=62.173, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.623e-04, train_time=1.186 +[gpub011:0/16] 2024-02-09 20:54:04,825 (trainer:762) INFO: 38epoch:train:14501-14600batch: iter_time=8.210e-05, forward_time=0.368, loss_ctc=50.177, loss_att=41.293, acc=0.775, loss=43.958, backward_time=0.328, grad_norm=50.405, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.623e-04, train_time=1.575 +[gpub011:0/16] 2024-02-09 20:56:04,683 (trainer:762) INFO: 38epoch:train:14601-14700batch: iter_time=8.791e-05, forward_time=0.298, loss_ctc=52.614, loss_att=53.764, acc=0.760, loss=53.419, backward_time=0.311, grad_norm=53.153, clip=100.000, loss_scale=8.048e+33, optim_step_time=0.094, optim0_lr0=1.623e-04, train_time=1.198 +[gpub011:0/16] 2024-02-09 20:58:23,264 (trainer:762) INFO: 38epoch:train:14701-14800batch: iter_time=8.338e-05, forward_time=0.288, loss_ctc=42.115, loss_att=38.256, acc=0.775, loss=39.414, backward_time=0.294, grad_norm=39.707, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.623e-04, train_time=1.386 +[gpub011:0/16] 2024-02-09 21:00:50,740 (trainer:762) INFO: 38epoch:train:14801-14900batch: iter_time=8.478e-05, forward_time=0.375, loss_ctc=46.803, loss_att=47.165, acc=0.756, loss=47.056, backward_time=0.351, grad_norm=46.172, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.623e-04, train_time=1.474 +[gpub011:0/16] 2024-02-09 21:03:07,040 (trainer:762) INFO: 38epoch:train:14901-15000batch: iter_time=8.049e-05, forward_time=0.301, loss_ctc=48.325, loss_att=47.156, acc=0.771, loss=47.507, backward_time=0.298, grad_norm=41.274, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.622e-04, train_time=1.363 +[gpub011:0/16] 2024-02-09 21:41:40,488 (trainer:361) INFO: 38epoch results: [train] iter_time=0.273, forward_time=0.326, loss_ctc=47.705, loss_att=46.264, acc=0.760, loss=46.696, backward_time=0.312, grad_norm=46.262, clip=100.000, loss_scale=6.194e+33, optim_step_time=0.096, optim0_lr0=1.633e-04, train_time=1.639, time=6 hours, 50 minutes and 12.04 seconds, total_count=600000, gpu_max_cached_mem_GB=43.805, [valid] loss_ctc=35.145, cer_ctc=0.179, loss_att=37.130, acc=0.683, cer=0.345, wer=0.998, loss=36.534, time=38 minutes and 9.34 seconds, total_count=186840, gpu_max_cached_mem_GB=43.805 +[gpub011:0/16] 2024-02-09 21:41:50,207 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-09 21:41:50,362 (trainer:290) INFO: 39/45epoch started. Estimated time to finish: 2 days, 3 hours and 37 minutes +[gpub011:0/16] 2024-02-09 21:41:50,371 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-09 21:42:09,062 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 21:42:12,545 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 21:42:12,545 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-09 21:42:12,548 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 21:48:31,038 (trainer:762) INFO: 39epoch:train:1-100batch: iter_time=2.861, forward_time=0.339, loss_ctc=51.230, loss_att=46.265, acc=0.760, loss=47.754, backward_time=0.303, grad_norm=47.260, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.622e-04, train_time=4.006 +[gpub011:0/16] 2024-02-09 21:50:24,504 (trainer:762) INFO: 39epoch:train:101-200batch: iter_time=8.253e-05, forward_time=0.293, loss_ctc=45.606, loss_att=38.903, acc=0.784, loss=40.914, backward_time=0.306, grad_norm=42.005, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.622e-04, train_time=1.135 +[gpub011:0/16] 2024-02-09 21:52:47,288 (trainer:762) INFO: 39epoch:train:201-300batch: iter_time=8.473e-05, forward_time=0.294, loss_ctc=54.257, loss_att=54.412, acc=0.755, loss=54.365, backward_time=0.301, grad_norm=56.390, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.622e-04, train_time=1.428 +[gpub011:0/16] 2024-02-09 21:55:01,593 (trainer:762) INFO: 39epoch:train:301-400batch: iter_time=9.345e-05, forward_time=0.377, loss_ctc=55.951, loss_att=48.755, acc=0.761, loss=50.913, backward_time=0.324, grad_norm=50.768, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.622e-04, train_time=1.343 +[gpub011:0/16] 2024-02-09 21:57:03,237 (trainer:762) INFO: 39epoch:train:401-500batch: iter_time=9.004e-05, forward_time=0.290, loss_ctc=43.364, loss_att=39.515, acc=0.770, loss=40.670, backward_time=0.300, grad_norm=40.189, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.622e-04, train_time=1.216 +[gpub011:0/16] 2024-02-09 21:59:07,906 (trainer:762) INFO: 39epoch:train:501-600batch: iter_time=8.967e-05, forward_time=0.293, loss_ctc=50.515, loss_att=52.554, acc=0.734, loss=51.942, backward_time=0.299, grad_norm=46.221, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.622e-04, train_time=1.247 +[gpub011:0/16] 2024-02-09 22:01:15,482 (trainer:762) INFO: 39epoch:train:601-700batch: iter_time=8.648e-05, forward_time=0.303, loss_ctc=40.461, loss_att=43.185, acc=0.776, loss=42.367, backward_time=0.306, grad_norm=37.563, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.621e-04, train_time=1.276 +[gpub011:0/16] 2024-02-09 22:03:39,887 (trainer:762) INFO: 39epoch:train:701-800batch: iter_time=1.039e-04, forward_time=0.337, loss_ctc=47.492, loss_att=46.150, acc=0.765, loss=46.552, backward_time=0.337, grad_norm=41.427, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.621e-04, train_time=1.443 +[gpub011:0/16] 2024-02-09 22:05:35,444 (trainer:762) INFO: 39epoch:train:801-900batch: iter_time=9.205e-05, forward_time=0.290, loss_ctc=48.235, loss_att=37.427, acc=0.763, loss=40.670, backward_time=0.294, grad_norm=45.324, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.621e-04, train_time=1.156 +[gpub011:0/16] 2024-02-09 22:07:46,201 (trainer:762) INFO: 39epoch:train:901-1000batch: iter_time=8.830e-05, forward_time=0.294, loss_ctc=49.381, loss_att=47.409, acc=0.764, loss=48.001, backward_time=0.299, grad_norm=56.574, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.621e-04, train_time=1.307 +[gpub011:0/16] 2024-02-09 22:09:52,452 (trainer:762) INFO: 39epoch:train:1001-1100batch: iter_time=1.983e-04, forward_time=0.306, loss_ctc=38.679, loss_att=35.676, acc=0.771, loss=36.577, backward_time=0.314, grad_norm=37.649, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.621e-04, train_time=1.262 +[gpub011:0/16] 2024-02-09 22:12:10,422 (trainer:762) INFO: 39epoch:train:1101-1200batch: iter_time=9.115e-05, forward_time=0.334, loss_ctc=41.549, loss_att=44.292, acc=0.759, loss=43.469, backward_time=0.332, grad_norm=38.930, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.621e-04, train_time=1.379 +[gpub011:0/16] 2024-02-09 22:13:31,904 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-09 22:13:51,222 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 22:13:55,139 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 22:13:55,139 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-09 22:13:55,143 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 22:19:47,374 (trainer:762) INFO: 39epoch:train:1201-1300batch: iter_time=3.297, forward_time=0.296, loss_ctc=44.125, loss_att=38.059, acc=0.789, loss=39.879, backward_time=0.296, grad_norm=39.143, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.621e-04, train_time=4.570 +[gpub011:0/16] 2024-02-09 22:21:45,163 (trainer:762) INFO: 39epoch:train:1301-1400batch: iter_time=7.811e-05, forward_time=0.312, loss_ctc=44.882, loss_att=44.634, acc=0.770, loss=44.709, backward_time=0.320, grad_norm=45.891, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.620e-04, train_time=1.178 +[gpub011:0/16] 2024-02-09 22:22:04,735 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 22:24:16,465 (trainer:762) INFO: 39epoch:train:1401-1500batch: iter_time=8.742e-05, forward_time=0.364, loss_ctc=47.104, loss_att=42.119, acc=0.783, loss=43.614, backward_time=0.314, grad_norm=39.689, clip=100.000, loss_scale=5.559e+33, optim_step_time=0.094, optim0_lr0=1.620e-04, train_time=1.512 +[gpub011:0/16] 2024-02-09 22:26:20,296 (trainer:762) INFO: 39epoch:train:1501-1600batch: iter_time=8.833e-05, forward_time=0.301, loss_ctc=57.515, loss_att=53.569, acc=0.758, loss=54.753, backward_time=0.309, grad_norm=54.383, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.620e-04, train_time=1.238 +[gpub011:0/16] 2024-02-09 22:28:28,657 (trainer:762) INFO: 39epoch:train:1601-1700batch: iter_time=8.749e-05, forward_time=0.312, loss_ctc=50.521, loss_att=47.855, acc=0.763, loss=48.655, backward_time=0.305, grad_norm=48.810, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.620e-04, train_time=1.284 +[gpub011:0/16] 2024-02-09 22:30:40,278 (trainer:762) INFO: 39epoch:train:1701-1800batch: iter_time=9.134e-05, forward_time=0.361, loss_ctc=42.752, loss_att=42.523, acc=0.754, loss=42.592, backward_time=0.312, grad_norm=39.309, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.620e-04, train_time=1.316 +[gpub011:0/16] 2024-02-09 22:32:53,560 (trainer:762) INFO: 39epoch:train:1801-1900batch: iter_time=9.193e-05, forward_time=0.298, loss_ctc=46.024, loss_att=49.665, acc=0.755, loss=48.573, backward_time=0.303, grad_norm=42.499, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.620e-04, train_time=1.333 +[gpub011:0/16] 2024-02-09 22:35:01,354 (trainer:762) INFO: 39epoch:train:1901-2000batch: iter_time=9.245e-05, forward_time=0.308, loss_ctc=46.601, loss_att=47.509, acc=0.775, loss=47.237, backward_time=0.311, grad_norm=41.383, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.620e-04, train_time=1.278 +[gpub011:0/16] 2024-02-09 22:37:02,285 (trainer:762) INFO: 39epoch:train:2001-2100batch: iter_time=8.661e-05, forward_time=0.346, loss_ctc=41.219, loss_att=36.170, acc=0.765, loss=37.684, backward_time=0.309, grad_norm=40.903, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.620e-04, train_time=1.208 +[gpub011:0/16] 2024-02-09 22:39:22,005 (trainer:762) INFO: 39epoch:train:2101-2200batch: iter_time=8.518e-05, forward_time=0.294, loss_ctc=51.760, loss_att=46.120, acc=0.762, loss=47.812, backward_time=0.304, grad_norm=59.843, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.619e-04, train_time=1.398 +[gpub011:0/16] 2024-02-09 22:41:35,834 (trainer:762) INFO: 39epoch:train:2201-2300batch: iter_time=9.326e-05, forward_time=0.338, loss_ctc=39.030, loss_att=34.986, acc=0.782, loss=36.199, backward_time=0.333, grad_norm=36.560, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.619e-04, train_time=1.338 +[gpub011:0/16] 2024-02-09 22:43:29,424 (trainer:762) INFO: 39epoch:train:2301-2400batch: iter_time=9.229e-05, forward_time=0.294, loss_ctc=40.681, loss_att=39.551, acc=0.762, loss=39.890, backward_time=0.300, grad_norm=39.598, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.619e-04, train_time=1.135 +[gpub011:0/16] 2024-02-09 22:45:48,760 (trainer:762) INFO: 39epoch:train:2401-2500batch: iter_time=9.248e-05, forward_time=0.299, loss_ctc=41.337, loss_att=42.639, acc=0.785, loss=42.249, backward_time=0.304, grad_norm=37.969, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.619e-04, train_time=1.394 +[gpub011:0/16] 2024-02-09 22:46:08,789 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-09 22:46:28,403 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 22:46:31,921 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 22:46:31,921 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-09 22:46:31,925 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 22:52:34,011 (trainer:762) INFO: 39epoch:train:2501-2600batch: iter_time=2.917, forward_time=0.329, loss_ctc=49.726, loss_att=48.331, acc=0.751, loss=48.750, backward_time=0.303, grad_norm=45.894, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.619e-04, train_time=4.051 +[gpub011:0/16] 2024-02-09 22:54:40,598 (trainer:762) INFO: 39epoch:train:2601-2700batch: iter_time=9.423e-05, forward_time=0.386, loss_ctc=44.648, loss_att=39.264, acc=0.778, loss=40.879, backward_time=0.320, grad_norm=40.951, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.619e-04, train_time=1.267 +[gpub011:0/16] 2024-02-09 22:56:58,420 (trainer:762) INFO: 39epoch:train:2701-2800batch: iter_time=9.791e-05, forward_time=0.294, loss_ctc=51.768, loss_att=55.109, acc=0.751, loss=54.107, backward_time=0.316, grad_norm=53.660, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.619e-04, train_time=1.378 +[gpub011:0/16] 2024-02-09 22:59:00,680 (trainer:762) INFO: 39epoch:train:2801-2900batch: iter_time=8.521e-05, forward_time=0.350, loss_ctc=52.141, loss_att=47.014, acc=0.752, loss=48.552, backward_time=0.334, grad_norm=49.741, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.618e-04, train_time=1.222 +[gpub011:0/16] 2024-02-09 23:00:59,797 (trainer:762) INFO: 39epoch:train:2901-3000batch: iter_time=8.234e-05, forward_time=0.289, loss_ctc=42.124, loss_att=39.565, acc=0.767, loss=40.333, backward_time=0.295, grad_norm=39.672, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.618e-04, train_time=1.192 +[gpub011:0/16] 2024-02-09 23:03:41,591 (trainer:762) INFO: 39epoch:train:3001-3100batch: iter_time=8.419e-05, forward_time=0.415, loss_ctc=48.894, loss_att=51.347, acc=0.733, loss=50.611, backward_time=0.317, grad_norm=45.520, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.618e-04, train_time=1.618 +[gpub011:0/16] 2024-02-09 23:05:33,660 (trainer:762) INFO: 39epoch:train:3101-3200batch: iter_time=8.268e-05, forward_time=0.289, loss_ctc=40.070, loss_att=42.753, acc=0.774, loss=41.948, backward_time=0.296, grad_norm=38.758, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.618e-04, train_time=1.120 +[gpub011:0/16] 2024-02-09 23:07:50,214 (trainer:762) INFO: 39epoch:train:3201-3300batch: iter_time=4.685e-04, forward_time=0.389, loss_ctc=46.966, loss_att=46.869, acc=0.759, loss=46.899, backward_time=0.346, grad_norm=42.042, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.618e-04, train_time=1.364 +[gpub011:0/16] 2024-02-09 23:09:55,854 (trainer:762) INFO: 39epoch:train:3301-3400batch: iter_time=9.047e-05, forward_time=0.295, loss_ctc=46.309, loss_att=37.251, acc=0.761, loss=39.968, backward_time=0.295, grad_norm=41.944, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.618e-04, train_time=1.257 +[gpub011:0/16] 2024-02-09 23:12:21,256 (trainer:762) INFO: 39epoch:train:3401-3500batch: iter_time=4.084e-04, forward_time=0.370, loss_ctc=47.382, loss_att=47.109, acc=0.760, loss=47.191, backward_time=0.306, grad_norm=51.913, clip=100.000, loss_scale=9.969e+33, optim_step_time=0.099, optim0_lr0=1.618e-04, train_time=1.454 +[gpub011:0/16] 2024-02-09 23:14:22,742 (trainer:762) INFO: 39epoch:train:3501-3600batch: iter_time=8.515e-05, forward_time=0.318, loss_ctc=38.266, loss_att=34.994, acc=0.773, loss=35.976, backward_time=0.304, grad_norm=35.513, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.617e-04, train_time=1.214 +[gpub011:0/16] 2024-02-09 23:16:36,766 (trainer:762) INFO: 39epoch:train:3601-3700batch: iter_time=3.908e-04, forward_time=0.336, loss_ctc=40.553, loss_att=45.029, acc=0.745, loss=43.686, backward_time=0.301, grad_norm=38.335, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.617e-04, train_time=1.341 +[gpub011:0/16] 2024-02-09 23:18:02,378 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-09 23:18:21,723 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 23:18:25,310 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 23:18:25,310 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-09 23:18:25,314 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 23:24:28,541 (trainer:762) INFO: 39epoch:train:3701-3800batch: iter_time=3.349, forward_time=0.353, loss_ctc=43.912, loss_att=38.571, acc=0.786, loss=40.174, backward_time=0.312, grad_norm=39.527, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.617e-04, train_time=4.718 +[gpub011:0/16] 2024-02-09 23:26:26,464 (trainer:762) INFO: 39epoch:train:3801-3900batch: iter_time=7.888e-05, forward_time=0.291, loss_ctc=43.472, loss_att=44.953, acc=0.770, loss=44.508, backward_time=0.297, grad_norm=42.719, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.617e-04, train_time=1.178 +[gpub011:0/16] 2024-02-09 23:28:03,676 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-09 23:28:51,033 (trainer:762) INFO: 39epoch:train:3901-4000batch: iter_time=2.464e-04, forward_time=0.402, loss_ctc=46.487, loss_att=41.862, acc=0.786, loss=43.249, backward_time=0.321, grad_norm=39.849, clip=100.000, loss_scale=8.129e+33, optim_step_time=0.104, optim0_lr0=1.617e-04, train_time=1.446 +[gpub011:0/16] 2024-02-09 23:31:03,205 (trainer:762) INFO: 39epoch:train:4001-4100batch: iter_time=8.557e-05, forward_time=0.293, loss_ctc=56.488, loss_att=53.710, acc=0.758, loss=54.543, backward_time=0.300, grad_norm=51.973, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.617e-04, train_time=1.322 +[gpub011:0/16] 2024-02-09 23:33:09,542 (trainer:762) INFO: 39epoch:train:4101-4200batch: iter_time=8.163e-05, forward_time=0.291, loss_ctc=49.148, loss_att=47.614, acc=0.764, loss=48.074, backward_time=0.297, grad_norm=45.680, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.617e-04, train_time=1.262 +[gpub011:0/16] 2024-02-09 23:35:31,288 (trainer:762) INFO: 39epoch:train:4201-4300batch: iter_time=0.002, forward_time=0.432, loss_ctc=42.828, loss_att=42.926, acc=0.754, loss=42.897, backward_time=0.311, grad_norm=39.790, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.616e-04, train_time=1.418 +[gpub011:0/16] 2024-02-09 23:37:45,397 (trainer:762) INFO: 39epoch:train:4301-4400batch: iter_time=8.206e-05, forward_time=0.293, loss_ctc=45.862, loss_att=48.942, acc=0.759, loss=48.018, backward_time=0.297, grad_norm=42.802, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.616e-04, train_time=1.341 +[gpub011:0/16] 2024-02-09 23:39:42,286 (trainer:762) INFO: 39epoch:train:4401-4500batch: iter_time=3.285e-04, forward_time=0.321, loss_ctc=46.324, loss_att=47.257, acc=0.777, loss=46.977, backward_time=0.316, grad_norm=40.317, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.616e-04, train_time=1.169 +[gpub011:0/16] 2024-02-09 23:41:58,525 (trainer:762) INFO: 39epoch:train:4501-4600batch: iter_time=8.102e-05, forward_time=0.397, loss_ctc=40.517, loss_att=35.706, acc=0.769, loss=37.149, backward_time=0.311, grad_norm=37.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.616e-04, train_time=1.362 +[gpub011:0/16] 2024-02-09 23:44:28,284 (trainer:762) INFO: 39epoch:train:4601-4700batch: iter_time=8.132e-05, forward_time=0.291, loss_ctc=50.809, loss_att=46.738, acc=0.760, loss=47.959, backward_time=0.296, grad_norm=55.302, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.616e-04, train_time=1.498 +[gpub011:0/16] 2024-02-09 23:46:36,707 (trainer:762) INFO: 39epoch:train:4701-4800batch: iter_time=3.609e-04, forward_time=0.390, loss_ctc=38.764, loss_att=35.339, acc=0.779, loss=36.367, backward_time=0.321, grad_norm=35.955, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.128, optim0_lr0=1.616e-04, train_time=1.283 +[gpub011:0/16] 2024-02-09 23:48:36,922 (trainer:762) INFO: 39epoch:train:4801-4900batch: iter_time=8.214e-05, forward_time=0.288, loss_ctc=40.392, loss_att=39.220, acc=0.765, loss=39.571, backward_time=0.294, grad_norm=39.616, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.616e-04, train_time=1.203 +[gpub011:0/16] 2024-02-09 23:50:47,623 (trainer:762) INFO: 39epoch:train:4901-5000batch: iter_time=8.224e-05, forward_time=0.291, loss_ctc=41.085, loss_att=43.153, acc=0.784, loss=42.533, backward_time=0.297, grad_norm=37.305, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.615e-04, train_time=1.307 +[gpub011:0/16] 2024-02-09 23:51:07,671 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-09 23:51:26,920 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-09 23:51:30,513 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-09 23:51:30,514 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-09 23:51:30,517 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-09 23:58:55,253 (trainer:762) INFO: 39epoch:train:5001-5100batch: iter_time=3.398, forward_time=0.454, loss_ctc=49.720, loss_att=45.565, acc=0.768, loss=46.811, backward_time=0.314, grad_norm=43.141, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.615e-04, train_time=4.875 +[gpub011:0/16] 2024-02-10 00:01:08,608 (trainer:762) INFO: 39epoch:train:5101-5200batch: iter_time=8.096e-05, forward_time=0.294, loss_ctc=43.664, loss_att=38.064, acc=0.789, loss=39.744, backward_time=0.297, grad_norm=39.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.615e-04, train_time=1.334 +[gpub011:0/16] 2024-02-10 00:03:27,524 (trainer:762) INFO: 39epoch:train:5201-5300batch: iter_time=8.429e-05, forward_time=0.294, loss_ctc=52.296, loss_att=55.019, acc=0.760, loss=54.202, backward_time=0.301, grad_norm=56.547, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.615e-04, train_time=1.388 +[gpub011:0/16] 2024-02-10 00:05:36,471 (trainer:762) INFO: 39epoch:train:5301-5400batch: iter_time=2.809e-04, forward_time=0.417, loss_ctc=51.391, loss_att=47.675, acc=0.765, loss=48.790, backward_time=0.324, grad_norm=52.090, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.615e-04, train_time=1.290 +[gpub011:0/16] 2024-02-10 00:07:56,038 (trainer:762) INFO: 39epoch:train:5401-5500batch: iter_time=8.928e-05, forward_time=0.290, loss_ctc=42.493, loss_att=39.968, acc=0.772, loss=40.725, backward_time=0.294, grad_norm=40.591, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.615e-04, train_time=1.394 +[gpub011:0/16] 2024-02-10 00:10:06,573 (trainer:762) INFO: 39epoch:train:5501-5600batch: iter_time=8.236e-05, forward_time=0.293, loss_ctc=48.590, loss_att=51.113, acc=0.742, loss=50.356, backward_time=0.298, grad_norm=44.894, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.615e-04, train_time=1.306 +[gpub011:0/16] 2024-02-10 00:12:17,014 (trainer:762) INFO: 39epoch:train:5601-5700batch: iter_time=8.987e-05, forward_time=0.409, loss_ctc=39.515, loss_att=41.704, acc=0.784, loss=41.047, backward_time=0.338, grad_norm=38.091, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.614e-04, train_time=1.303 +[gpub011:0/16] 2024-02-10 00:14:38,078 (trainer:762) INFO: 39epoch:train:5701-5800batch: iter_time=8.660e-05, forward_time=0.292, loss_ctc=46.401, loss_att=45.890, acc=0.769, loss=46.043, backward_time=0.298, grad_norm=39.626, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.614e-04, train_time=1.410 +[gpub011:0/16] 2024-02-10 00:16:48,501 (trainer:762) INFO: 39epoch:train:5801-5900batch: iter_time=8.497e-05, forward_time=0.291, loss_ctc=46.065, loss_att=37.028, acc=0.767, loss=39.739, backward_time=0.295, grad_norm=42.757, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.614e-04, train_time=1.305 +[gpub011:0/16] 2024-02-10 00:18:57,309 (trainer:762) INFO: 39epoch:train:5901-6000batch: iter_time=8.564e-05, forward_time=0.291, loss_ctc=47.108, loss_att=46.423, acc=0.765, loss=46.628, backward_time=0.297, grad_norm=50.020, clip=100.000, loss_scale=7.425e+33, optim_step_time=0.093, optim0_lr0=1.614e-04, train_time=1.288 +[gpub011:0/16] 2024-02-10 00:21:20,195 (trainer:762) INFO: 39epoch:train:6001-6100batch: iter_time=8.294e-05, forward_time=0.372, loss_ctc=38.293, loss_att=35.289, acc=0.776, loss=36.190, backward_time=0.400, grad_norm=38.596, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.614e-04, train_time=1.428 +[gpub011:0/16] 2024-02-10 00:22:00,647 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 00:23:33,256 (trainer:762) INFO: 39epoch:train:6101-6200batch: iter_time=8.357e-05, forward_time=0.291, loss_ctc=40.440, loss_att=44.462, acc=0.762, loss=43.256, backward_time=0.296, grad_norm=37.909, clip=100.000, loss_scale=7.080e+33, optim_step_time=0.093, optim0_lr0=1.614e-04, train_time=1.331 +[gpub011:0/16] 2024-02-10 00:24:50,319 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-10 00:25:09,939 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 00:25:13,520 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 00:25:13,520 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-10 00:25:13,524 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 00:31:11,232 (trainer:762) INFO: 39epoch:train:6201-6300batch: iter_time=3.334, forward_time=0.288, loss_ctc=43.378, loss_att=38.085, acc=0.791, loss=39.673, backward_time=0.295, grad_norm=39.915, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.614e-04, train_time=4.580 +[gpub011:0/16] 2024-02-10 00:33:12,837 (trainer:762) INFO: 39epoch:train:6301-6400batch: iter_time=8.088e-05, forward_time=0.355, loss_ctc=43.425, loss_att=45.955, acc=0.757, loss=45.196, backward_time=0.332, grad_norm=44.629, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.613e-04, train_time=1.215 +[gpub011:0/16] 2024-02-10 00:35:29,581 (trainer:762) INFO: 39epoch:train:6401-6500batch: iter_time=8.127e-05, forward_time=0.291, loss_ctc=46.433, loss_att=42.171, acc=0.780, loss=43.450, backward_time=0.298, grad_norm=39.177, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.613e-04, train_time=1.367 +[gpub011:0/16] 2024-02-10 00:37:46,088 (trainer:762) INFO: 39epoch:train:6501-6600batch: iter_time=8.426e-05, forward_time=0.291, loss_ctc=56.168, loss_att=55.579, acc=0.749, loss=55.756, backward_time=0.298, grad_norm=52.556, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.613e-04, train_time=1.365 +[gpub011:0/16] 2024-02-10 00:40:13,868 (trainer:762) INFO: 39epoch:train:6601-6700batch: iter_time=3.600e-04, forward_time=0.407, loss_ctc=48.826, loss_att=46.997, acc=0.754, loss=47.546, backward_time=0.319, grad_norm=46.793, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.127, optim0_lr0=1.613e-04, train_time=1.478 +[gpub011:0/16] 2024-02-10 00:42:27,044 (trainer:762) INFO: 39epoch:train:6701-6800batch: iter_time=8.934e-05, forward_time=0.289, loss_ctc=42.549, loss_att=41.777, acc=0.758, loss=42.008, backward_time=0.294, grad_norm=37.333, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.613e-04, train_time=1.331 +[gpub011:0/16] 2024-02-10 00:44:29,006 (trainer:762) INFO: 39epoch:train:6801-6900batch: iter_time=8.788e-05, forward_time=0.290, loss_ctc=45.177, loss_att=49.419, acc=0.753, loss=48.147, backward_time=0.297, grad_norm=41.830, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.613e-04, train_time=1.219 +[gpub011:0/16] 2024-02-10 00:47:05,905 (trainer:762) INFO: 39epoch:train:6901-7000batch: iter_time=8.965e-05, forward_time=0.444, loss_ctc=45.801, loss_att=47.374, acc=0.770, loss=46.902, backward_time=0.331, grad_norm=39.927, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.613e-04, train_time=1.569 +[gpub011:0/16] 2024-02-10 00:49:11,623 (trainer:762) INFO: 39epoch:train:7001-7100batch: iter_time=8.766e-05, forward_time=0.289, loss_ctc=40.005, loss_att=36.195, acc=0.758, loss=37.338, backward_time=0.294, grad_norm=37.451, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.612e-04, train_time=1.257 +[gpub011:0/16] 2024-02-10 00:51:09,870 (trainer:762) INFO: 39epoch:train:7101-7200batch: iter_time=8.524e-05, forward_time=0.291, loss_ctc=50.013, loss_att=45.495, acc=0.766, loss=46.850, backward_time=0.297, grad_norm=55.571, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.612e-04, train_time=1.182 +[gpub011:0/16] 2024-02-10 00:53:42,890 (trainer:762) INFO: 39epoch:train:7201-7300batch: iter_time=8.686e-05, forward_time=0.288, loss_ctc=38.310, loss_att=34.873, acc=0.775, loss=35.904, backward_time=0.294, grad_norm=36.610, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.612e-04, train_time=1.530 +[gpub011:0/16] 2024-02-10 00:55:53,324 (trainer:762) INFO: 39epoch:train:7301-7400batch: iter_time=8.558e-05, forward_time=0.398, loss_ctc=39.837, loss_att=38.634, acc=0.762, loss=38.995, backward_time=0.325, grad_norm=40.684, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=1.612e-04, train_time=1.305 +[gpub011:0/16] 2024-02-10 00:58:11,508 (trainer:762) INFO: 39epoch:train:7401-7500batch: iter_time=8.506e-05, forward_time=0.289, loss_ctc=40.583, loss_att=43.676, acc=0.774, loss=42.748, backward_time=0.296, grad_norm=38.792, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.612e-04, train_time=1.382 +[gpub011:0/16] 2024-02-10 00:58:31,536 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-10 00:58:51,268 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 00:58:55,082 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 00:58:55,082 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-10 00:58:55,085 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 01:05:38,160 (trainer:762) INFO: 39epoch:train:7501-7600batch: iter_time=3.287, forward_time=0.358, loss_ctc=49.898, loss_att=45.335, acc=0.761, loss=46.704, backward_time=0.312, grad_norm=43.212, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.612e-04, train_time=4.466 +[gpub011:0/16] 2024-02-10 01:07:41,913 (trainer:762) INFO: 39epoch:train:7601-7700batch: iter_time=8.489e-05, forward_time=0.291, loss_ctc=43.472, loss_att=38.062, acc=0.782, loss=39.685, backward_time=0.297, grad_norm=39.839, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.612e-04, train_time=1.237 +[gpub011:0/16] 2024-02-10 01:10:18,513 (trainer:762) INFO: 39epoch:train:7701-7800batch: iter_time=8.651e-05, forward_time=0.400, loss_ctc=50.287, loss_att=53.690, acc=0.755, loss=52.669, backward_time=0.316, grad_norm=53.694, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.611e-04, train_time=1.566 +[gpub011:0/16] 2024-02-10 01:12:09,310 (trainer:762) INFO: 39epoch:train:7801-7900batch: iter_time=8.750e-05, forward_time=0.289, loss_ctc=51.472, loss_att=46.125, acc=0.756, loss=47.729, backward_time=0.295, grad_norm=48.009, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.611e-04, train_time=1.108 +[gpub011:0/16] 2024-02-10 01:14:25,265 (trainer:762) INFO: 39epoch:train:7901-8000batch: iter_time=8.317e-05, forward_time=0.289, loss_ctc=42.377, loss_att=39.334, acc=0.769, loss=40.247, backward_time=0.295, grad_norm=39.788, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.611e-04, train_time=1.359 +[gpub011:0/16] 2024-02-10 01:16:56,588 (trainer:762) INFO: 39epoch:train:8001-8100batch: iter_time=8.065e-05, forward_time=0.341, loss_ctc=48.311, loss_att=51.337, acc=0.732, loss=50.429, backward_time=0.385, grad_norm=45.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.611e-04, train_time=1.513 +[gpub011:0/16] 2024-02-10 01:19:05,043 (trainer:762) INFO: 39epoch:train:8101-8200batch: iter_time=8.332e-05, forward_time=0.288, loss_ctc=39.574, loss_att=41.909, acc=0.778, loss=41.209, backward_time=0.295, grad_norm=39.146, clip=100.000, loss_scale=8.463e+33, optim_step_time=0.093, optim0_lr0=1.611e-04, train_time=1.284 +[gpub011:0/16] 2024-02-10 01:21:06,008 (trainer:762) INFO: 39epoch:train:8201-8300batch: iter_time=8.081e-05, forward_time=0.291, loss_ctc=45.816, loss_att=46.059, acc=0.763, loss=45.986, backward_time=0.298, grad_norm=49.489, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.611e-04, train_time=1.210 +[gpub011:0/16] 2024-02-10 01:23:50,492 (trainer:762) INFO: 39epoch:train:8301-8400batch: iter_time=4.294e-04, forward_time=0.399, loss_ctc=45.325, loss_att=36.954, acc=0.764, loss=39.466, backward_time=0.317, grad_norm=42.901, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.111, optim0_lr0=1.611e-04, train_time=1.645 +[gpub011:0/16] 2024-02-10 01:25:39,100 (trainer:762) INFO: 39epoch:train:8401-8500batch: iter_time=8.310e-05, forward_time=0.290, loss_ctc=45.823, loss_att=45.348, acc=0.764, loss=45.491, backward_time=0.297, grad_norm=50.671, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.611e-04, train_time=1.086 +[gpub011:0/16] 2024-02-10 01:27:42,999 (trainer:762) INFO: 39epoch:train:8501-8600batch: iter_time=8.115e-05, forward_time=0.290, loss_ctc=38.212, loss_att=34.960, acc=0.774, loss=35.935, backward_time=0.294, grad_norm=36.365, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.610e-04, train_time=1.238 +[gpub011:0/16] 2024-02-10 01:30:22,866 (trainer:762) INFO: 39epoch:train:8601-8700batch: iter_time=8.865e-05, forward_time=0.331, loss_ctc=40.436, loss_att=44.567, acc=0.749, loss=43.328, backward_time=0.351, grad_norm=38.036, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.610e-04, train_time=1.599 +[gpub011:0/16] 2024-02-10 01:31:41,117 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-10 01:32:00,530 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 01:32:04,106 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 01:32:04,106 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-10 01:32:04,165 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 01:38:09,898 (trainer:762) INFO: 39epoch:train:8701-8800batch: iter_time=3.293, forward_time=0.288, loss_ctc=43.622, loss_att=38.667, acc=0.787, loss=40.154, backward_time=0.295, grad_norm=38.703, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.610e-04, train_time=4.670 +[gpub011:0/16] 2024-02-10 01:40:31,208 (trainer:762) INFO: 39epoch:train:8801-8900batch: iter_time=8.538e-05, forward_time=0.290, loss_ctc=43.122, loss_att=45.513, acc=0.769, loss=44.796, backward_time=0.297, grad_norm=45.582, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.610e-04, train_time=1.413 +[gpub011:0/16] 2024-02-10 01:43:08,336 (trainer:762) INFO: 39epoch:train:8901-9000batch: iter_time=1.016e-04, forward_time=0.384, loss_ctc=46.232, loss_att=42.040, acc=0.787, loss=43.298, backward_time=0.385, grad_norm=37.836, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.610e-04, train_time=1.571 +[gpub011:0/16] 2024-02-10 01:44:56,975 (trainer:762) INFO: 39epoch:train:9001-9100batch: iter_time=9.926e-05, forward_time=0.292, loss_ctc=54.939, loss_att=53.858, acc=0.758, loss=54.182, backward_time=0.300, grad_norm=52.518, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.610e-04, train_time=1.086 +[gpub011:0/16] 2024-02-10 01:47:04,743 (trainer:762) INFO: 39epoch:train:9101-9200batch: iter_time=9.150e-05, forward_time=0.292, loss_ctc=48.799, loss_att=47.748, acc=0.766, loss=48.063, backward_time=0.298, grad_norm=46.971, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.610e-04, train_time=1.277 +[gpub011:0/16] 2024-02-10 01:49:47,861 (trainer:762) INFO: 39epoch:train:9201-9300batch: iter_time=9.298e-05, forward_time=0.330, loss_ctc=41.723, loss_att=42.071, acc=0.758, loss=41.967, backward_time=0.360, grad_norm=39.474, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.609e-04, train_time=1.631 +[gpub011:0/16] 2024-02-10 01:51:57,816 (trainer:762) INFO: 39epoch:train:9301-9400batch: iter_time=9.857e-05, forward_time=0.294, loss_ctc=45.408, loss_att=49.185, acc=0.761, loss=48.051, backward_time=0.298, grad_norm=42.634, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.609e-04, train_time=1.299 +[gpub011:0/16] 2024-02-10 01:54:08,928 (trainer:762) INFO: 39epoch:train:9401-9500batch: iter_time=8.879e-05, forward_time=0.294, loss_ctc=45.851, loss_att=47.388, acc=0.780, loss=46.927, backward_time=0.301, grad_norm=40.033, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.609e-04, train_time=1.311 +[gpub011:0/16] 2024-02-10 01:56:36,297 (trainer:762) INFO: 39epoch:train:9501-9600batch: iter_time=8.394e-05, forward_time=0.289, loss_ctc=40.140, loss_att=35.824, acc=0.769, loss=37.118, backward_time=0.294, grad_norm=41.767, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.609e-04, train_time=1.473 +[gpub011:0/16] 2024-02-10 01:58:46,000 (trainer:762) INFO: 39epoch:train:9601-9700batch: iter_time=9.301e-05, forward_time=0.382, loss_ctc=49.776, loss_att=46.609, acc=0.762, loss=47.559, backward_time=0.355, grad_norm=56.975, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.609e-04, train_time=1.297 +[gpub011:0/16] 2024-02-10 02:00:52,756 (trainer:762) INFO: 39epoch:train:9701-9800batch: iter_time=8.300e-05, forward_time=0.290, loss_ctc=38.582, loss_att=35.477, acc=0.782, loss=36.409, backward_time=0.297, grad_norm=36.842, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.609e-04, train_time=1.267 +[gpub011:0/16] 2024-02-10 02:03:22,859 (trainer:762) INFO: 39epoch:train:9801-9900batch: iter_time=9.055e-05, forward_time=0.289, loss_ctc=40.105, loss_att=39.095, acc=0.768, loss=39.398, backward_time=0.295, grad_norm=37.928, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.609e-04, train_time=1.501 +[gpub011:0/16] 2024-02-10 02:05:39,255 (trainer:762) INFO: 39epoch:train:9901-10000batch: iter_time=8.521e-05, forward_time=0.358, loss_ctc=40.820, loss_att=43.723, acc=0.786, loss=42.852, backward_time=0.355, grad_norm=36.299, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.608e-04, train_time=1.364 +[gpub011:0/16] 2024-02-10 02:05:59,284 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-10 02:06:19,072 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 02:06:22,642 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 02:06:22,642 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-10 02:06:22,673 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 02:13:18,313 (trainer:762) INFO: 39epoch:train:10001-10100batch: iter_time=3.287, forward_time=0.289, loss_ctc=48.725, loss_att=46.264, acc=0.758, loss=47.002, backward_time=0.295, grad_norm=45.010, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.608e-04, train_time=4.590 +[gpub011:0/16] 2024-02-10 02:14:50,780 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 02:15:55,528 (trainer:762) INFO: 39epoch:train:10101-10200batch: iter_time=8.386e-05, forward_time=0.290, loss_ctc=43.086, loss_att=38.495, acc=0.781, loss=39.872, backward_time=0.296, grad_norm=40.259, clip=100.000, loss_scale=1.374e+34, optim_step_time=0.095, optim0_lr0=1.608e-04, train_time=1.572 +[gpub011:0/16] 2024-02-10 02:18:03,366 (trainer:762) INFO: 39epoch:train:10201-10300batch: iter_time=3.089e-04, forward_time=0.359, loss_ctc=50.182, loss_att=53.452, acc=0.756, loss=52.471, backward_time=0.343, grad_norm=52.409, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.608e-04, train_time=1.278 +[gpub011:0/16] 2024-02-10 02:20:12,419 (trainer:762) INFO: 39epoch:train:10301-10400batch: iter_time=9.213e-05, forward_time=0.291, loss_ctc=51.420, loss_att=46.114, acc=0.757, loss=47.706, backward_time=0.298, grad_norm=48.864, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.608e-04, train_time=1.290 +[gpub011:0/16] 2024-02-10 02:22:37,210 (trainer:762) INFO: 39epoch:train:10401-10500batch: iter_time=9.090e-05, forward_time=0.289, loss_ctc=41.839, loss_att=39.505, acc=0.769, loss=40.205, backward_time=0.296, grad_norm=39.680, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.608e-04, train_time=1.448 +[gpub011:0/16] 2024-02-10 02:25:03,558 (trainer:762) INFO: 39epoch:train:10501-10600batch: iter_time=9.009e-05, forward_time=0.406, loss_ctc=47.637, loss_att=51.231, acc=0.733, loss=50.153, backward_time=0.319, grad_norm=79.526, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.608e-04, train_time=1.463 +[gpub011:0/16] 2024-02-10 02:25:37,726 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 02:26:56,398 (trainer:762) INFO: 39epoch:train:10601-10700batch: iter_time=9.002e-05, forward_time=0.291, loss_ctc=39.763, loss_att=42.403, acc=0.778, loss=41.611, backward_time=0.297, grad_norm=37.898, clip=100.000, loss_scale=6.713e+33, optim_step_time=0.093, optim0_lr0=1.607e-04, train_time=1.128 +[gpub011:0/16] 2024-02-10 02:29:39,552 (trainer:762) INFO: 39epoch:train:10701-10800batch: iter_time=8.437e-05, forward_time=0.294, loss_ctc=46.061, loss_att=46.213, acc=0.762, loss=46.167, backward_time=0.296, grad_norm=40.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.607e-04, train_time=1.631 +[gpub011:0/16] 2024-02-10 02:31:28,105 (trainer:762) INFO: 39epoch:train:10801-10900batch: iter_time=8.936e-05, forward_time=0.289, loss_ctc=45.310, loss_att=36.823, acc=0.765, loss=39.369, backward_time=0.295, grad_norm=43.039, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.607e-04, train_time=1.085 +[gpub011:0/16] 2024-02-10 02:33:57,266 (trainer:762) INFO: 39epoch:train:10901-11000batch: iter_time=9.268e-05, forward_time=0.421, loss_ctc=45.950, loss_att=45.110, acc=0.764, loss=45.362, backward_time=0.339, grad_norm=51.738, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.607e-04, train_time=1.491 +[gpub011:0/16] 2024-02-10 02:36:20,823 (trainer:762) INFO: 39epoch:train:11001-11100batch: iter_time=8.907e-05, forward_time=0.287, loss_ctc=37.805, loss_att=34.720, acc=0.775, loss=35.646, backward_time=0.291, grad_norm=37.844, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.607e-04, train_time=1.435 +[gpub011:0/16] 2024-02-10 02:38:28,657 (trainer:762) INFO: 39epoch:train:11101-11200batch: iter_time=8.846e-05, forward_time=0.289, loss_ctc=39.463, loss_att=43.999, acc=0.752, loss=42.638, backward_time=0.295, grad_norm=37.187, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.607e-04, train_time=1.278 +[gpub011:0/16] 2024-02-10 02:40:08,240 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-10 02:40:27,728 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 02:40:31,302 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 02:40:31,302 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-10 02:40:31,305 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 02:46:20,494 (trainer:762) INFO: 39epoch:train:11201-11300batch: iter_time=3.224, forward_time=0.403, loss_ctc=43.297, loss_att=38.125, acc=0.789, loss=39.677, backward_time=0.317, grad_norm=39.042, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.607e-04, train_time=4.718 +[gpub011:0/16] 2024-02-10 02:47:51,281 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 02:48:08,780 (trainer:762) INFO: 39epoch:train:11301-11400batch: iter_time=9.003e-05, forward_time=0.291, loss_ctc=43.075, loss_att=45.003, acc=0.773, loss=44.425, backward_time=0.298, grad_norm=40.920, clip=100.000, loss_scale=4.773e+33, optim_step_time=0.093, optim0_lr0=1.606e-04, train_time=1.083 +[gpub011:0/16] 2024-02-10 02:50:34,157 (trainer:762) INFO: 39epoch:train:11401-11500batch: iter_time=9.390e-05, forward_time=0.293, loss_ctc=45.888, loss_att=41.732, acc=0.788, loss=42.979, backward_time=0.298, grad_norm=39.903, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.606e-04, train_time=1.453 +[gpub011:0/16] 2024-02-10 02:53:08,851 (trainer:762) INFO: 39epoch:train:11501-11600batch: iter_time=9.612e-05, forward_time=0.389, loss_ctc=55.454, loss_att=52.495, acc=0.764, loss=53.383, backward_time=0.338, grad_norm=52.833, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.606e-04, train_time=1.547 +[gpub011:0/16] 2024-02-10 02:55:22,089 (trainer:762) INFO: 39epoch:train:11601-11700batch: iter_time=8.145e-05, forward_time=0.291, loss_ctc=48.482, loss_att=48.088, acc=0.766, loss=48.207, backward_time=0.298, grad_norm=46.449, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.606e-04, train_time=1.332 +[gpub011:0/16] 2024-02-10 02:57:26,040 (trainer:762) INFO: 39epoch:train:11701-11800batch: iter_time=9.534e-05, forward_time=0.293, loss_ctc=42.008, loss_att=42.172, acc=0.760, loss=42.123, backward_time=0.296, grad_norm=39.371, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.606e-04, train_time=1.239 +[gpub011:0/16] 2024-02-10 03:00:04,830 (trainer:762) INFO: 39epoch:train:11801-11900batch: iter_time=8.849e-05, forward_time=0.368, loss_ctc=45.263, loss_att=49.403, acc=0.758, loss=48.161, backward_time=0.328, grad_norm=44.127, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.606e-04, train_time=1.588 +[gpub011:0/16] 2024-02-10 03:02:11,900 (trainer:762) INFO: 39epoch:train:11901-12000batch: iter_time=9.022e-05, forward_time=0.294, loss_ctc=45.444, loss_att=47.322, acc=0.780, loss=46.758, backward_time=0.300, grad_norm=44.455, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.606e-04, train_time=1.271 +[gpub011:0/16] 2024-02-10 03:04:42,785 (trainer:762) INFO: 39epoch:train:12001-12100batch: iter_time=9.363e-05, forward_time=0.290, loss_ctc=39.766, loss_att=35.540, acc=0.769, loss=36.808, backward_time=0.293, grad_norm=38.884, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.606e-04, train_time=1.508 +[gpub011:0/16] 2024-02-10 03:06:40,270 (trainer:762) INFO: 39epoch:train:12101-12200batch: iter_time=9.078e-05, forward_time=0.292, loss_ctc=49.272, loss_att=45.439, acc=0.768, loss=46.589, backward_time=0.297, grad_norm=51.836, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.605e-04, train_time=1.175 +[gpub011:0/16] 2024-02-10 03:09:17,512 (trainer:762) INFO: 39epoch:train:12201-12300batch: iter_time=8.569e-05, forward_time=0.394, loss_ctc=38.515, loss_att=34.872, acc=0.785, loss=35.965, backward_time=0.318, grad_norm=34.650, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.605e-04, train_time=1.572 +[gpub011:0/16] 2024-02-10 03:11:18,619 (trainer:762) INFO: 39epoch:train:12301-12400batch: iter_time=8.624e-05, forward_time=0.288, loss_ctc=39.892, loss_att=38.967, acc=0.768, loss=39.245, backward_time=0.294, grad_norm=37.626, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.605e-04, train_time=1.211 +[gpub011:0/16] 2024-02-10 03:13:30,272 (trainer:762) INFO: 39epoch:train:12401-12500batch: iter_time=8.769e-05, forward_time=0.293, loss_ctc=40.496, loss_att=42.266, acc=0.790, loss=41.735, backward_time=0.298, grad_norm=35.976, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.605e-04, train_time=1.316 +[gpub011:0/16] 2024-02-10 03:13:50,301 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-10 03:14:10,338 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 03:14:13,929 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 03:14:13,929 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-10 03:14:13,933 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 03:20:50,611 (trainer:762) INFO: 39epoch:train:12501-12600batch: iter_time=3.145, forward_time=0.385, loss_ctc=49.039, loss_att=46.908, acc=0.757, loss=47.547, backward_time=0.315, grad_norm=45.026, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.605e-04, train_time=4.403 +[gpub011:0/16] 2024-02-10 03:23:05,260 (trainer:762) INFO: 39epoch:train:12601-12700batch: iter_time=8.251e-05, forward_time=0.291, loss_ctc=42.780, loss_att=38.192, acc=0.782, loss=39.569, backward_time=0.295, grad_norm=38.984, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.605e-04, train_time=1.346 +[gpub011:0/16] 2024-02-10 03:25:33,355 (trainer:762) INFO: 39epoch:train:12701-12800batch: iter_time=8.782e-05, forward_time=0.324, loss_ctc=49.680, loss_att=53.779, acc=0.756, loss=52.549, backward_time=0.347, grad_norm=50.333, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.605e-04, train_time=1.481 +[gpub011:0/16] 2024-02-10 03:27:35,522 (trainer:762) INFO: 39epoch:train:12801-12900batch: iter_time=9.367e-05, forward_time=0.307, loss_ctc=50.877, loss_att=46.212, acc=0.756, loss=47.611, backward_time=0.307, grad_norm=50.083, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.604e-04, train_time=1.221 +[gpub011:0/16] 2024-02-10 03:30:08,251 (trainer:762) INFO: 39epoch:train:12901-13000batch: iter_time=9.151e-05, forward_time=0.313, loss_ctc=42.000, loss_att=39.024, acc=0.771, loss=39.917, backward_time=0.293, grad_norm=38.674, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.604e-04, train_time=1.527 +[gpub011:0/16] 2024-02-10 03:31:57,688 (trainer:762) INFO: 39epoch:train:13001-13100batch: iter_time=9.072e-05, forward_time=0.292, loss_ctc=48.088, loss_att=50.554, acc=0.736, loss=49.814, backward_time=0.299, grad_norm=46.664, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.604e-04, train_time=1.094 +[gpub011:0/16] 2024-02-10 03:34:34,701 (trainer:762) INFO: 39epoch:train:13101-13200batch: iter_time=9.594e-05, forward_time=0.355, loss_ctc=39.473, loss_att=42.287, acc=0.777, loss=41.443, backward_time=0.334, grad_norm=37.124, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.604e-04, train_time=1.569 +[gpub011:0/16] 2024-02-10 03:36:44,400 (trainer:762) INFO: 39epoch:train:13201-13300batch: iter_time=8.518e-05, forward_time=0.292, loss_ctc=45.794, loss_att=46.033, acc=0.761, loss=45.961, backward_time=0.299, grad_norm=40.116, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.604e-04, train_time=1.298 +[gpub011:0/16] 2024-02-10 03:38:50,840 (trainer:762) INFO: 39epoch:train:13301-13400batch: iter_time=8.645e-05, forward_time=0.288, loss_ctc=45.277, loss_att=36.886, acc=0.767, loss=39.403, backward_time=0.294, grad_norm=41.921, clip=100.000, loss_scale=3.012e+33, optim_step_time=0.093, optim0_lr0=1.604e-04, train_time=1.264 +[gpub011:0/16] 2024-02-10 03:41:23,067 (trainer:762) INFO: 39epoch:train:13401-13500batch: iter_time=2.546e-04, forward_time=0.371, loss_ctc=45.652, loss_att=45.442, acc=0.764, loss=45.505, backward_time=0.321, grad_norm=52.128, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.604e-04, train_time=1.522 +[gpub011:0/16] 2024-02-10 03:43:31,018 (trainer:762) INFO: 39epoch:train:13501-13600batch: iter_time=8.595e-05, forward_time=0.304, loss_ctc=37.617, loss_att=34.310, acc=0.777, loss=35.302, backward_time=0.298, grad_norm=36.949, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.603e-04, train_time=1.278 +[gpub011:0/16] 2024-02-10 03:45:36,008 (trainer:762) INFO: 39epoch:train:13601-13700batch: iter_time=9.307e-05, forward_time=0.292, loss_ctc=39.690, loss_att=44.270, acc=0.750, loss=42.896, backward_time=0.296, grad_norm=39.588, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.603e-04, train_time=1.251 +[gpub011:0/16] 2024-02-10 03:47:04,174 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-10 03:47:24,270 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 03:47:27,858 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 03:47:27,858 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-10 03:47:27,863 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 03:53:25,625 (trainer:762) INFO: 39epoch:train:13701-13800batch: iter_time=3.351, forward_time=0.369, loss_ctc=43.168, loss_att=37.323, acc=0.791, loss=39.077, backward_time=0.309, grad_norm=39.851, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.603e-04, train_time=4.696 +[gpub011:0/16] 2024-02-10 03:55:29,821 (trainer:762) INFO: 39epoch:train:13801-13900batch: iter_time=7.898e-05, forward_time=0.289, loss_ctc=42.685, loss_att=43.863, acc=0.762, loss=43.510, backward_time=0.294, grad_norm=42.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.603e-04, train_time=1.241 +[gpub011:0/16] 2024-02-10 03:57:55,214 (trainer:762) INFO: 39epoch:train:13901-14000batch: iter_time=8.020e-05, forward_time=0.292, loss_ctc=46.580, loss_att=41.624, acc=0.782, loss=43.111, backward_time=0.297, grad_norm=39.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.603e-04, train_time=1.455 +[gpub011:0/16] 2024-02-10 04:00:08,434 (trainer:762) INFO: 39epoch:train:14001-14100batch: iter_time=8.190e-05, forward_time=0.374, loss_ctc=55.530, loss_att=54.878, acc=0.753, loss=55.074, backward_time=0.319, grad_norm=54.883, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.603e-04, train_time=1.332 +[gpub011:0/16] 2024-02-10 04:02:41,470 (trainer:762) INFO: 39epoch:train:14101-14200batch: iter_time=8.147e-05, forward_time=0.290, loss_ctc=47.886, loss_att=45.913, acc=0.758, loss=46.505, backward_time=0.294, grad_norm=47.388, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.603e-04, train_time=1.529 +[gpub011:0/16] 2024-02-10 04:04:36,726 (trainer:762) INFO: 39epoch:train:14201-14300batch: iter_time=7.910e-05, forward_time=0.291, loss_ctc=42.109, loss_att=42.358, acc=0.757, loss=42.283, backward_time=0.295, grad_norm=39.799, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.603e-04, train_time=1.154 +[gpub011:0/16] 2024-02-10 04:07:04,421 (trainer:762) INFO: 39epoch:train:14301-14400batch: iter_time=8.043e-05, forward_time=0.358, loss_ctc=44.918, loss_att=49.361, acc=0.751, loss=48.028, backward_time=0.354, grad_norm=42.519, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.602e-04, train_time=1.477 +[gpub011:0/16] 2024-02-10 04:09:26,568 (trainer:762) INFO: 39epoch:train:14401-14500batch: iter_time=8.183e-05, forward_time=0.293, loss_ctc=45.725, loss_att=47.083, acc=0.773, loss=46.675, backward_time=0.298, grad_norm=41.187, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.602e-04, train_time=1.420 +[gpub011:0/16] 2024-02-10 04:11:37,801 (trainer:762) INFO: 39epoch:train:14501-14600batch: iter_time=0.001, forward_time=0.373, loss_ctc=39.647, loss_att=36.005, acc=0.760, loss=37.098, backward_time=0.365, grad_norm=38.810, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.112, optim0_lr0=1.602e-04, train_time=1.314 +[gpub011:0/16] 2024-02-10 04:13:54,282 (trainer:762) INFO: 39epoch:train:14601-14700batch: iter_time=8.217e-05, forward_time=0.290, loss_ctc=49.896, loss_att=46.064, acc=0.763, loss=47.214, backward_time=0.295, grad_norm=53.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.602e-04, train_time=1.364 +[gpub011:0/16] 2024-02-10 04:16:07,508 (trainer:762) INFO: 39epoch:train:14701-14800batch: iter_time=8.027e-05, forward_time=0.289, loss_ctc=38.358, loss_att=34.267, acc=0.777, loss=35.494, backward_time=0.292, grad_norm=36.361, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.602e-04, train_time=1.333 +[gpub011:0/16] 2024-02-10 04:18:28,426 (trainer:762) INFO: 39epoch:train:14801-14900batch: iter_time=8.013e-05, forward_time=0.402, loss_ctc=39.452, loss_att=38.392, acc=0.766, loss=38.710, backward_time=0.318, grad_norm=36.665, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.602e-04, train_time=1.409 +[gpub011:0/16] 2024-02-10 04:20:31,345 (trainer:762) INFO: 39epoch:train:14901-15000batch: iter_time=7.903e-05, forward_time=0.291, loss_ctc=40.396, loss_att=43.137, acc=0.777, loss=42.315, backward_time=0.297, grad_norm=38.726, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.602e-04, train_time=1.229 +[gpub011:0/16] 2024-02-10 04:58:50,649 (trainer:361) INFO: 39epoch results: [train] iter_time=0.258, forward_time=0.323, loss_ctc=45.121, loss_att=43.705, acc=0.766, loss=44.130, backward_time=0.309, grad_norm=43.358, clip=100.000, loss_scale=6.470e+33, optim_step_time=0.096, optim0_lr0=1.612e-04, train_time=1.595, time=6 hours, 39 minutes and 5.81 seconds, total_count=615000, gpu_max_cached_mem_GB=43.805, [valid] loss_ctc=36.305, cer_ctc=0.183, loss_att=39.322, acc=0.689, cer=0.319, wer=0.988, loss=38.417, time=37 minutes and 54.25 seconds, total_count=191511, gpu_max_cached_mem_GB=43.805 +[gpub011:0/16] 2024-02-10 04:59:00,298 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-10 04:59:00,411 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/31epoch.pth +[gpub011:0/16] 2024-02-10 04:59:00,411 (trainer:290) INFO: 40/45epoch started. Estimated time to finish: 1 day, 20 hours and 9 minutes +[gpub011:0/16] 2024-02-10 04:59:00,421 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-10 04:59:18,877 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 04:59:22,323 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 04:59:22,323 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-10 04:59:22,326 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 05:05:48,189 (trainer:762) INFO: 40epoch:train:1-100batch: iter_time=2.960, forward_time=0.321, loss_ctc=47.859, loss_att=39.866, acc=0.778, loss=42.264, backward_time=0.302, grad_norm=43.408, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.601e-04, train_time=4.077 +[gpub011:0/16] 2024-02-10 05:07:44,743 (trainer:762) INFO: 40epoch:train:101-200batch: iter_time=8.501e-05, forward_time=0.290, loss_ctc=44.612, loss_att=49.821, acc=0.743, loss=48.258, backward_time=0.298, grad_norm=53.263, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.601e-04, train_time=1.165 +[gpub011:0/16] 2024-02-10 05:09:56,898 (trainer:762) INFO: 40epoch:train:201-300batch: iter_time=9.110e-05, forward_time=0.297, loss_ctc=40.028, loss_att=35.508, acc=0.781, loss=36.864, backward_time=0.304, grad_norm=39.600, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.601e-04, train_time=1.321 +[gpub011:0/16] 2024-02-10 05:12:18,491 (trainer:762) INFO: 40epoch:train:301-400batch: iter_time=9.413e-05, forward_time=0.347, loss_ctc=57.058, loss_att=55.220, acc=0.736, loss=55.771, backward_time=0.356, grad_norm=55.957, clip=100.000, loss_scale=6.023e+33, optim_step_time=0.100, optim0_lr0=1.601e-04, train_time=1.416 +[gpub011:0/16] 2024-02-10 05:14:26,094 (trainer:762) INFO: 40epoch:train:401-500batch: iter_time=9.017e-05, forward_time=0.290, loss_ctc=51.434, loss_att=50.801, acc=0.741, loss=50.991, backward_time=0.296, grad_norm=48.900, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.601e-04, train_time=1.276 +[gpub011:0/16] 2024-02-10 05:16:31,408 (trainer:762) INFO: 40epoch:train:501-600batch: iter_time=9.055e-05, forward_time=0.314, loss_ctc=46.312, loss_att=44.663, acc=0.786, loss=45.158, backward_time=0.313, grad_norm=39.673, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.601e-04, train_time=1.253 +[gpub011:0/16] 2024-02-10 05:19:01,160 (trainer:762) INFO: 40epoch:train:601-700batch: iter_time=8.549e-05, forward_time=0.330, loss_ctc=50.798, loss_att=49.519, acc=0.761, loss=49.902, backward_time=0.333, grad_norm=50.091, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.601e-04, train_time=1.497 +[gpub011:0/16] 2024-02-10 05:21:00,372 (trainer:762) INFO: 40epoch:train:701-800batch: iter_time=8.548e-05, forward_time=0.292, loss_ctc=45.749, loss_att=45.250, acc=0.763, loss=45.400, backward_time=0.299, grad_norm=43.157, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.600e-04, train_time=1.192 +[gpub011:0/16] 2024-02-10 05:21:37,902 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 05:23:28,288 (trainer:762) INFO: 40epoch:train:801-900batch: iter_time=8.541e-05, forward_time=0.379, loss_ctc=45.896, loss_att=41.039, acc=0.769, loss=42.496, backward_time=0.369, grad_norm=41.588, clip=100.000, loss_scale=6.661e+33, optim_step_time=0.100, optim0_lr0=1.600e-04, train_time=1.479 +[gpub011:0/16] 2024-02-10 05:25:28,280 (trainer:762) INFO: 40epoch:train:901-1000batch: iter_time=8.864e-05, forward_time=0.293, loss_ctc=54.202, loss_att=54.408, acc=0.743, loss=54.346, backward_time=0.300, grad_norm=55.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.600e-04, train_time=1.200 +[gpub011:0/16] 2024-02-10 05:27:53,511 (trainer:762) INFO: 40epoch:train:1001-1100batch: iter_time=8.846e-05, forward_time=0.297, loss_ctc=51.177, loss_att=48.684, acc=0.758, loss=49.432, backward_time=0.304, grad_norm=48.664, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.600e-04, train_time=1.452 +[gpub011:0/16] 2024-02-10 05:30:04,769 (trainer:762) INFO: 40epoch:train:1101-1200batch: iter_time=2.671e-04, forward_time=0.373, loss_ctc=49.649, loss_att=47.832, acc=0.760, loss=48.377, backward_time=0.350, grad_norm=43.589, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.600e-04, train_time=1.312 +[gpub011:0/16] 2024-02-10 05:31:25,943 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-10 05:31:45,508 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 05:31:49,033 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 05:31:49,033 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-10 05:31:49,036 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 05:37:31,677 (trainer:762) INFO: 40epoch:train:1201-1300batch: iter_time=3.173, forward_time=0.287, loss_ctc=43.020, loss_att=40.580, acc=0.772, loss=41.312, backward_time=0.293, grad_norm=40.874, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.600e-04, train_time=4.469 +[gpub011:0/16] 2024-02-10 05:39:22,219 (trainer:762) INFO: 40epoch:train:1301-1400batch: iter_time=7.902e-05, forward_time=0.291, loss_ctc=45.839, loss_att=47.758, acc=0.755, loss=47.182, backward_time=0.299, grad_norm=47.553, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.600e-04, train_time=1.105 +[gpub011:0/16] 2024-02-10 05:41:50,013 (trainer:762) INFO: 40epoch:train:1401-1500batch: iter_time=8.607e-05, forward_time=0.376, loss_ctc=41.367, loss_att=40.148, acc=0.755, loss=40.514, backward_time=0.315, grad_norm=48.052, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.599e-04, train_time=1.478 +[gpub011:0/16] 2024-02-10 05:43:57,490 (trainer:762) INFO: 40epoch:train:1501-1600batch: iter_time=8.410e-05, forward_time=0.299, loss_ctc=42.765, loss_att=40.158, acc=0.761, loss=40.940, backward_time=0.297, grad_norm=41.716, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.599e-04, train_time=1.274 +[gpub011:0/16] 2024-02-10 05:45:53,781 (trainer:762) INFO: 40epoch:train:1601-1700batch: iter_time=8.217e-05, forward_time=0.311, loss_ctc=49.072, loss_att=51.427, acc=0.746, loss=50.720, backward_time=0.296, grad_norm=51.528, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.599e-04, train_time=1.163 +[gpub011:0/16] 2024-02-10 05:48:01,828 (trainer:762) INFO: 40epoch:train:1701-1800batch: iter_time=8.115e-05, forward_time=0.299, loss_ctc=53.802, loss_att=47.505, acc=0.748, loss=49.394, backward_time=0.301, grad_norm=50.240, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.599e-04, train_time=1.280 +[gpub011:0/16] 2024-02-10 05:50:30,627 (trainer:762) INFO: 40epoch:train:1801-1900batch: iter_time=8.987e-05, forward_time=0.355, loss_ctc=49.631, loss_att=48.324, acc=0.766, loss=48.716, backward_time=0.350, grad_norm=44.273, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.599e-04, train_time=1.487 +[gpub011:0/16] 2024-02-10 05:52:39,323 (trainer:762) INFO: 40epoch:train:1901-2000batch: iter_time=8.699e-05, forward_time=0.290, loss_ctc=43.836, loss_att=42.679, acc=0.768, loss=43.026, backward_time=0.296, grad_norm=37.103, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.599e-04, train_time=1.286 +[gpub011:0/16] 2024-02-10 05:54:30,468 (trainer:762) INFO: 40epoch:train:2001-2100batch: iter_time=8.920e-05, forward_time=0.292, loss_ctc=44.581, loss_att=39.988, acc=0.770, loss=41.366, backward_time=0.299, grad_norm=40.405, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.599e-04, train_time=1.112 +[gpub011:0/16] 2024-02-10 05:56:51,817 (trainer:762) INFO: 40epoch:train:2101-2200batch: iter_time=9.374e-05, forward_time=0.307, loss_ctc=47.843, loss_att=45.968, acc=0.758, loss=46.530, backward_time=0.314, grad_norm=45.330, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.599e-04, train_time=1.413 +[gpub011:0/16] 2024-02-10 05:59:06,558 (trainer:762) INFO: 40epoch:train:2201-2300batch: iter_time=2.226e-04, forward_time=0.346, loss_ctc=51.285, loss_att=49.553, acc=0.739, loss=50.073, backward_time=0.310, grad_norm=55.799, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.598e-04, train_time=1.347 +[gpub011:0/16] 2024-02-10 06:01:12,799 (trainer:762) INFO: 40epoch:train:2301-2400batch: iter_time=8.177e-05, forward_time=0.294, loss_ctc=51.448, loss_att=51.157, acc=0.753, loss=51.244, backward_time=0.300, grad_norm=45.269, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.262 +[gpub011:0/16] 2024-02-10 06:03:19,530 (trainer:762) INFO: 40epoch:train:2401-2500batch: iter_time=8.089e-05, forward_time=0.290, loss_ctc=43.890, loss_att=44.249, acc=0.766, loss=44.141, backward_time=0.296, grad_norm=41.920, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.268 +[gpub011:0/16] 2024-02-10 06:03:39,571 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-10 06:03:59,013 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 06:04:02,546 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 06:04:02,547 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-10 06:04:02,550 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 06:10:22,097 (trainer:762) INFO: 40epoch:train:2501-2600batch: iter_time=2.902, forward_time=0.416, loss_ctc=45.764, loss_att=37.583, acc=0.777, loss=40.037, backward_time=0.313, grad_norm=40.886, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.598e-04, train_time=4.225 +[gpub011:0/16] 2024-02-10 06:12:25,437 (trainer:762) INFO: 40epoch:train:2601-2700batch: iter_time=7.925e-05, forward_time=0.290, loss_ctc=42.692, loss_att=45.899, acc=0.742, loss=44.937, backward_time=0.295, grad_norm=48.220, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.233 +[gpub011:0/16] 2024-02-10 06:14:33,242 (trainer:762) INFO: 40epoch:train:2701-2800batch: iter_time=8.166e-05, forward_time=0.288, loss_ctc=39.367, loss_att=34.455, acc=0.782, loss=35.929, backward_time=0.294, grad_norm=37.537, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.278 +[gpub011:0/16] 2024-02-10 06:16:51,544 (trainer:762) INFO: 40epoch:train:2801-2900batch: iter_time=8.312e-05, forward_time=0.335, loss_ctc=53.517, loss_att=54.230, acc=0.733, loss=54.016, backward_time=0.308, grad_norm=52.410, clip=100.000, loss_scale=8.879e+33, optim_step_time=0.094, optim0_lr0=1.598e-04, train_time=1.383 +[gpub011:0/16] 2024-02-10 06:17:55,290 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 06:19:07,140 (trainer:762) INFO: 40epoch:train:2901-3000batch: iter_time=8.458e-05, forward_time=0.378, loss_ctc=49.251, loss_att=47.635, acc=0.744, loss=48.120, backward_time=0.301, grad_norm=50.665, clip=100.000, loss_scale=7.395e+33, optim_step_time=0.095, optim0_lr0=1.597e-04, train_time=1.356 +[gpub011:0/16] 2024-02-10 06:21:20,686 (trainer:762) INFO: 40epoch:train:3001-3100batch: iter_time=8.486e-05, forward_time=0.292, loss_ctc=45.515, loss_att=44.082, acc=0.780, loss=44.512, backward_time=0.298, grad_norm=39.068, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.597e-04, train_time=1.335 +[gpub011:0/16] 2024-02-10 06:23:41,170 (trainer:762) INFO: 40epoch:train:3101-3200batch: iter_time=8.225e-05, forward_time=0.290, loss_ctc=49.093, loss_att=47.347, acc=0.760, loss=47.871, backward_time=0.295, grad_norm=43.774, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.597e-04, train_time=1.404 +[gpub011:0/16] 2024-02-10 06:25:47,756 (trainer:762) INFO: 40epoch:train:3201-3300batch: iter_time=8.370e-05, forward_time=0.376, loss_ctc=44.717, loss_att=44.017, acc=0.765, loss=44.227, backward_time=0.370, grad_norm=41.586, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.597e-04, train_time=1.267 +[gpub011:0/16] 2024-02-10 06:28:07,374 (trainer:762) INFO: 40epoch:train:3301-3400batch: iter_time=9.391e-05, forward_time=0.290, loss_ctc=43.938, loss_att=39.412, acc=0.766, loss=40.769, backward_time=0.294, grad_norm=39.830, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.597e-04, train_time=1.396 +[gpub011:0/16] 2024-02-10 06:30:15,702 (trainer:762) INFO: 40epoch:train:3401-3500batch: iter_time=9.092e-05, forward_time=0.291, loss_ctc=52.731, loss_att=52.187, acc=0.747, loss=52.350, backward_time=0.298, grad_norm=57.639, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.597e-04, train_time=1.283 +[gpub011:0/16] 2024-02-10 06:32:10,245 (trainer:762) INFO: 40epoch:train:3501-3600batch: iter_time=8.536e-05, forward_time=0.292, loss_ctc=50.160, loss_att=46.981, acc=0.750, loss=47.935, backward_time=0.299, grad_norm=46.091, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.597e-04, train_time=1.144 +[gpub011:0/16] 2024-02-10 06:34:33,519 (trainer:762) INFO: 40epoch:train:3601-3700batch: iter_time=9.227e-05, forward_time=0.419, loss_ctc=48.352, loss_att=46.646, acc=0.764, loss=47.158, backward_time=0.322, grad_norm=42.479, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.597e-04, train_time=1.434 +[gpub011:0/16] 2024-02-10 06:35:51,784 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-10 06:36:11,322 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 06:36:15,091 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 06:36:15,092 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-10 06:36:15,095 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 06:41:46,746 (trainer:762) INFO: 40epoch:train:3701-3800batch: iter_time=3.081, forward_time=0.288, loss_ctc=42.204, loss_att=39.331, acc=0.772, loss=40.193, backward_time=0.295, grad_norm=41.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=4.332 +[gpub011:0/16] 2024-02-10 06:43:53,060 (trainer:762) INFO: 40epoch:train:3801-3900batch: iter_time=7.947e-05, forward_time=0.291, loss_ctc=46.097, loss_att=46.494, acc=0.759, loss=46.375, backward_time=0.297, grad_norm=44.665, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=1.263 +[gpub011:0/16] 2024-02-10 06:45:59,166 (trainer:762) INFO: 40epoch:train:3901-4000batch: iter_time=7.865e-05, forward_time=0.410, loss_ctc=41.259, loss_att=39.380, acc=0.758, loss=39.944, backward_time=0.314, grad_norm=45.143, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.596e-04, train_time=1.261 +[gpub011:0/16] 2024-02-10 06:48:07,849 (trainer:762) INFO: 40epoch:train:4001-4100batch: iter_time=8.177e-05, forward_time=0.289, loss_ctc=42.500, loss_att=39.855, acc=0.762, loss=40.648, backward_time=0.294, grad_norm=40.939, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.596e-04, train_time=1.287 +[gpub011:0/16] 2024-02-10 06:50:14,328 (trainer:762) INFO: 40epoch:train:4101-4200batch: iter_time=8.220e-05, forward_time=0.290, loss_ctc=47.666, loss_att=50.917, acc=0.750, loss=49.942, backward_time=0.295, grad_norm=49.555, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.596e-04, train_time=1.265 +[gpub011:0/16] 2024-02-10 06:52:25,327 (trainer:762) INFO: 40epoch:train:4201-4300batch: iter_time=7.866e-05, forward_time=0.363, loss_ctc=52.258, loss_att=46.566, acc=0.753, loss=48.274, backward_time=0.377, grad_norm=46.457, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.596e-04, train_time=1.309 +[gpub011:0/16] 2024-02-10 06:54:42,261 (trainer:762) INFO: 40epoch:train:4301-4400batch: iter_time=7.839e-05, forward_time=0.291, loss_ctc=48.917, loss_att=47.375, acc=0.771, loss=47.838, backward_time=0.296, grad_norm=46.680, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.596e-04, train_time=1.370 +[gpub011:0/16] 2024-02-10 06:56:44,613 (trainer:762) INFO: 40epoch:train:4401-4500batch: iter_time=7.917e-05, forward_time=0.291, loss_ctc=43.347, loss_att=42.116, acc=0.771, loss=42.486, backward_time=0.297, grad_norm=36.707, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.595e-04, train_time=1.223 +[gpub011:0/16] 2024-02-10 06:58:39,581 (trainer:762) INFO: 40epoch:train:4501-4600batch: iter_time=7.760e-05, forward_time=0.312, loss_ctc=43.896, loss_att=39.483, acc=0.773, loss=40.807, backward_time=0.317, grad_norm=39.672, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.595e-04, train_time=1.149 +[gpub011:0/16] 2024-02-10 07:01:02,431 (trainer:762) INFO: 40epoch:train:4601-4700batch: iter_time=7.874e-05, forward_time=0.370, loss_ctc=47.282, loss_att=45.675, acc=0.760, loss=46.157, backward_time=0.336, grad_norm=44.750, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.595e-04, train_time=1.428 +[gpub011:0/16] 2024-02-10 07:03:22,335 (trainer:762) INFO: 40epoch:train:4701-4800batch: iter_time=7.807e-05, forward_time=0.290, loss_ctc=51.687, loss_att=48.303, acc=0.741, loss=49.318, backward_time=0.295, grad_norm=57.202, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.595e-04, train_time=1.399 +[gpub011:0/16] 2024-02-10 07:05:19,756 (trainer:762) INFO: 40epoch:train:4801-4900batch: iter_time=8.353e-05, forward_time=0.294, loss_ctc=51.203, loss_att=50.034, acc=0.756, loss=50.385, backward_time=0.300, grad_norm=45.743, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=1.174 +[gpub011:0/16] 2024-02-10 07:07:42,473 (trainer:762) INFO: 40epoch:train:4901-5000batch: iter_time=7.878e-05, forward_time=0.373, loss_ctc=43.226, loss_att=44.113, acc=0.768, loss=43.847, backward_time=0.358, grad_norm=41.774, clip=100.000, loss_scale=8.152e+33, optim_step_time=0.098, optim0_lr0=1.595e-04, train_time=1.427 +[gpub011:0/16] 2024-02-10 07:08:02,516 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-10 07:08:21,892 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 07:08:25,483 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 07:08:25,483 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-10 07:08:25,486 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 07:15:12,903 (trainer:762) INFO: 40epoch:train:5001-5100batch: iter_time=3.274, forward_time=0.289, loss_ctc=44.438, loss_att=38.663, acc=0.785, loss=40.396, backward_time=0.297, grad_norm=40.158, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=4.504 +[gpub011:0/16] 2024-02-10 07:15:42,788 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 07:17:25,845 (trainer:762) INFO: 40epoch:train:5101-5200batch: iter_time=8.333e-05, forward_time=0.291, loss_ctc=42.167, loss_att=46.770, acc=0.755, loss=45.389, backward_time=0.297, grad_norm=49.635, clip=100.000, loss_scale=6.556e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.330 +[gpub011:0/16] 2024-02-10 07:19:58,057 (trainer:762) INFO: 40epoch:train:5201-5300batch: iter_time=8.889e-05, forward_time=0.345, loss_ctc=39.111, loss_att=34.800, acc=0.789, loss=36.093, backward_time=0.401, grad_norm=36.245, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.594e-04, train_time=1.522 +[gpub011:0/16] 2024-02-10 07:21:50,657 (trainer:762) INFO: 40epoch:train:5301-5400batch: iter_time=9.097e-05, forward_time=0.292, loss_ctc=52.331, loss_att=53.403, acc=0.745, loss=53.081, backward_time=0.299, grad_norm=52.803, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.126 +[gpub011:0/16] 2024-02-10 07:24:03,477 (trainer:762) INFO: 40epoch:train:5401-5500batch: iter_time=1.025e-04, forward_time=0.334, loss_ctc=47.843, loss_att=48.884, acc=0.749, loss=48.572, backward_time=0.308, grad_norm=46.012, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.594e-04, train_time=1.328 +[gpub011:0/16] 2024-02-10 07:26:21,907 (trainer:762) INFO: 40epoch:train:5501-5600batch: iter_time=8.611e-05, forward_time=0.352, loss_ctc=44.946, loss_att=43.766, acc=0.792, loss=44.120, backward_time=0.303, grad_norm=36.836, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.384 +[gpub011:0/16] 2024-02-10 07:28:37,935 (trainer:762) INFO: 40epoch:train:5601-5700batch: iter_time=8.578e-05, forward_time=0.292, loss_ctc=48.825, loss_att=48.630, acc=0.767, loss=48.688, backward_time=0.297, grad_norm=48.954, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.360 +[gpub011:0/16] 2024-02-10 07:30:54,714 (trainer:762) INFO: 40epoch:train:5701-5800batch: iter_time=9.969e-05, forward_time=0.380, loss_ctc=44.247, loss_att=43.998, acc=0.769, loss=44.073, backward_time=0.317, grad_norm=41.074, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.594e-04, train_time=1.368 +[gpub011:0/16] 2024-02-10 07:33:01,624 (trainer:762) INFO: 40epoch:train:5801-5900batch: iter_time=9.047e-05, forward_time=0.294, loss_ctc=43.565, loss_att=40.313, acc=0.775, loss=41.288, backward_time=0.296, grad_norm=39.694, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.269 +[gpub011:0/16] 2024-02-10 07:35:16,660 (trainer:762) INFO: 40epoch:train:5901-6000batch: iter_time=8.382e-05, forward_time=0.292, loss_ctc=52.916, loss_att=52.517, acc=0.748, loss=52.637, backward_time=0.297, grad_norm=60.667, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.593e-04, train_time=1.350 +[gpub011:0/16] 2024-02-10 07:37:25,815 (trainer:762) INFO: 40epoch:train:6001-6100batch: iter_time=8.725e-05, forward_time=0.366, loss_ctc=50.091, loss_att=48.218, acc=0.762, loss=48.780, backward_time=0.353, grad_norm=47.402, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.593e-04, train_time=1.291 +[gpub011:0/16] 2024-02-10 07:39:56,541 (trainer:762) INFO: 40epoch:train:6101-6200batch: iter_time=8.685e-05, forward_time=0.295, loss_ctc=47.942, loss_att=46.514, acc=0.766, loss=46.943, backward_time=0.297, grad_norm=41.501, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.593e-04, train_time=1.507 +[gpub011:0/16] 2024-02-10 07:41:31,267 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-10 07:41:50,619 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 07:41:54,234 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 07:41:54,234 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-10 07:41:54,239 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 07:48:01,006 (trainer:762) INFO: 40epoch:train:6201-6300batch: iter_time=3.296, forward_time=0.363, loss_ctc=41.541, loss_att=39.401, acc=0.781, loss=40.043, backward_time=0.322, grad_norm=39.372, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.593e-04, train_time=4.844 +[gpub011:0/16] 2024-02-10 07:50:08,391 (trainer:762) INFO: 40epoch:train:6301-6400batch: iter_time=8.143e-05, forward_time=0.338, loss_ctc=45.121, loss_att=45.900, acc=0.777, loss=45.666, backward_time=0.306, grad_norm=42.630, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.593e-04, train_time=1.274 +[gpub011:0/16] 2024-02-10 07:52:39,426 (trainer:762) INFO: 40epoch:train:6401-6500batch: iter_time=8.121e-05, forward_time=0.312, loss_ctc=40.935, loss_att=41.142, acc=0.762, loss=41.080, backward_time=0.293, grad_norm=47.505, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.593e-04, train_time=1.510 +[gpub011:0/16] 2024-02-10 07:54:44,863 (trainer:762) INFO: 40epoch:train:6501-6600batch: iter_time=8.285e-05, forward_time=0.354, loss_ctc=42.211, loss_att=39.446, acc=0.770, loss=40.275, backward_time=0.327, grad_norm=41.307, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.593e-04, train_time=1.254 +[gpub011:0/16] 2024-02-10 07:56:51,773 (trainer:762) INFO: 40epoch:train:6601-6700batch: iter_time=8.430e-05, forward_time=0.325, loss_ctc=46.973, loss_att=52.849, acc=0.755, loss=51.086, backward_time=0.323, grad_norm=48.162, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.592e-04, train_time=1.269 +[gpub011:0/16] 2024-02-10 07:59:16,780 (trainer:762) INFO: 40epoch:train:6701-6800batch: iter_time=8.288e-05, forward_time=0.291, loss_ctc=51.479, loss_att=46.107, acc=0.761, loss=47.719, backward_time=0.297, grad_norm=47.217, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.450 +[gpub011:0/16] 2024-02-10 08:01:30,008 (trainer:762) INFO: 40epoch:train:6801-6900batch: iter_time=8.145e-05, forward_time=0.307, loss_ctc=48.202, loss_att=46.925, acc=0.778, loss=47.308, backward_time=0.312, grad_norm=47.509, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.592e-04, train_time=1.332 +[gpub011:0/16] 2024-02-10 08:04:33,479 (trainer:762) INFO: 40epoch:train:6901-7000batch: iter_time=8.448e-05, forward_time=0.401, loss_ctc=43.639, loss_att=43.929, acc=0.775, loss=43.842, backward_time=0.331, grad_norm=37.366, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.592e-04, train_time=1.834 +[gpub011:0/16] 2024-02-10 08:06:25,477 (trainer:762) INFO: 40epoch:train:7001-7100batch: iter_time=7.978e-05, forward_time=0.292, loss_ctc=43.892, loss_att=40.377, acc=0.778, loss=41.431, backward_time=0.298, grad_norm=40.130, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.120 +[gpub011:0/16] 2024-02-10 08:08:29,361 (trainer:762) INFO: 40epoch:train:7101-7200batch: iter_time=8.186e-05, forward_time=0.311, loss_ctc=46.853, loss_att=46.104, acc=0.766, loss=46.329, backward_time=0.307, grad_norm=43.592, clip=100.000, loss_scale=8.983e+33, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.238 +[gpub011:0/16] 2024-02-10 08:11:07,081 (trainer:762) INFO: 40epoch:train:7201-7300batch: iter_time=1.261e-04, forward_time=0.320, loss_ctc=50.591, loss_att=49.461, acc=0.751, loss=49.800, backward_time=0.338, grad_norm=55.727, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.592e-04, train_time=1.576 +[gpub011:0/16] 2024-02-10 08:13:06,195 (trainer:762) INFO: 40epoch:train:7301-7400batch: iter_time=8.465e-05, forward_time=0.319, loss_ctc=50.870, loss_att=50.635, acc=0.763, loss=50.706, backward_time=0.326, grad_norm=45.318, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.191 +[gpub011:0/16] 2024-02-10 08:15:31,918 (trainer:762) INFO: 40epoch:train:7401-7500batch: iter_time=8.202e-05, forward_time=0.291, loss_ctc=42.861, loss_att=44.499, acc=0.771, loss=44.007, backward_time=0.295, grad_norm=40.829, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.591e-04, train_time=1.457 +[gpub011:0/16] 2024-02-10 08:15:51,951 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-10 08:16:11,384 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-10 08:16:14,956 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-10 08:16:14,956 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-10 08:16:15,009 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-10 08:23:06,031 (trainer:762) INFO: 40epoch:train:7501-7600batch: iter_time=3.183, forward_time=0.352, loss_ctc=44.399, loss_att=36.902, acc=0.792, loss=39.151, backward_time=0.314, grad_norm=41.651, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.591e-04, train_time=4.541 +[gpub011:0/16] 2024-02-10 08:25:31,223 (trainer:762) INFO: 40epoch:train:7601-7700batch: iter_time=8.613e-05, forward_time=0.342, loss_ctc=42.221, loss_att=44.539, acc=0.764, loss=43.843, backward_time=0.303, grad_norm=48.636, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.591e-04, train_time=1.452 +[gpub011:0/16] 2024-02-10 08:27:49,427 (trainer:762) INFO: 40epoch:train:7701-7800batch: iter_time=8.577e-05, forward_time=0.289, loss_ctc=38.844, loss_att=34.166, acc=0.790, loss=35.569, backward_time=0.295, grad_norm=36.977, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.591e-04, train_time=1.382 +[gpub011:0/16] 2024-02-10 08:28:16,890 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-10 08:30:28,504 (trainer:762) INFO: 40epoch:train:7801-7900batch: iter_time=9.123e-05, forward_time=0.338, loss_ctc=52.514, loss_att=53.222, acc=0.747, loss=53.009, backward_time=0.345, grad_norm=51.442, clip=100.000, loss_scale=6.241e+33, optim_step_time=0.103, optim0_lr0=1.591e-04, train_time=1.591 +[gpub011:0/16] 2024-02-10 08:32:56,829 (trainer:762) INFO: 40epoch:train:7901-8000batch: iter_time=8.773e-05, forward_time=0.333, loss_ctc=47.856, loss_att=49.608, acc=0.747, loss=49.082, backward_time=0.302, grad_norm=47.291, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.591e-04, train_time=1.483 +[gpub011:0/16] 2024-02-10 08:35:20,763 (trainer:762) INFO: 40epoch:train:8001-8100batch: iter_time=8.871e-05, forward_time=0.292, loss_ctc=45.018, loss_att=43.464, acc=0.792, loss=43.930, backward_time=0.298, grad_norm=37.813, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.591e-04, train_time=1.439 +[gpub011:0/16] 2024-02-10 08:37:55,624 (trainer:762) INFO: 40epoch:train:8101-8200batch: iter_time=8.806e-05, forward_time=0.350, loss_ctc=48.936, loss_att=49.011, acc=0.765, loss=48.988, backward_time=0.323, grad_norm=42.997, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.590e-04, train_time=1.548 +[gpub011:0/16] 2024-02-10 08:40:27,311 (trainer:762) INFO: 40epoch:train:8201-8300batch: iter_time=9.276e-05, forward_time=0.290, loss_ctc=43.803, loss_att=43.285, acc=0.772, loss=43.440, backward_time=0.296, grad_norm=39.630, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.590e-04, train_time=1.517 +[gpub011:0/16] 2024-02-10 08:43:14,386 (trainer:762) INFO: 40epoch:train:8301-8400batch: iter_time=9.201e-05, forward_time=0.324, loss_ctc=43.245, loss_att=39.430, acc=0.777, loss=40.575, backward_time=0.314, grad_norm=39.310, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.590e-04, train_time=1.668 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2955233.0 ON gpub011 CANCELLED AT 2024-02-10T08:43:24 DUE TO TIME LIMIT *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.10.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.10.log new file mode 100644 index 0000000000000000000000000000000000000000..c00b9cf89c64d5d7dd6ad13678ea1fa55d628468 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.10.log @@ -0,0 +1,2592 @@ +# Running on gpub058.delta.ncsa.illinois.edu +# Started at Fri Feb 2 20:29:30 CST 2024 +# SLURMD_NODENAME=gpub058 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2932506 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1707100152 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2932506 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[058,061-062,071]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706927352 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[058,061-062,071]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=2024661 +# SLURM_TOPOLOGY_ADDR=ss00.ss11.gpub058 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_8c2850e3-53c6-4718-bc5e-c97ca6838673 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_8c2850e3-53c6-4718-bc5e-c97ca6838673 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_8c2850e3-53c6-4718-bc5e-c97ca6838673 +[gpub058:0/16] 2024-02-02 20:31:09,702 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub058:0/16] 2024-02-02 20:31:19,708 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:31:29,714 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:31:39,726 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:31:49,729 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:31:59,736 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:32:09,737 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:32:19,759 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=8, timeout=0:30:00) +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_8c2850e3-53c6-4718-bc5e-c97ca6838673 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_8c2850e3-53c6-4718-bc5e-c97ca6838673 +[gpub058:0/16] 2024-02-02 20:32:30,011 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=16, worker_count=10, timeout=0:30:00) +[gpub058:0/16] 2024-02-02 20:32:31,239 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub058:0/16] 2024-02-02 20:32:31,326 (s2t:464) INFO: Vocabulary size: 50002 +[gpub058:0/16] 2024-02-02 20:32:37,473 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub058:0/16] 2024-02-02 20:32:37,479 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub058:0/16] 2024-02-02 20:32:37,480 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub058:0/16] 2024-02-02 20:32:37,480 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub058:0/16] 2024-02-02 20:32:37,492 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub058:0/16] 2024-02-02 20:32:43,001 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 20:32:44,177 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 20:32:44,177 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub058:0/16] 2024-02-02 20:32:44,178 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-02 20:32:59,169 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub058:2024730:2024730 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2024730:2024730 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2024730:2024730 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub058:0/16] 2024-02-02 20:33:04,760 (trainer:284) INFO: 22/45epoch started +[gpub058:0/16] 2024-02-02 20:33:04,804 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-02 20:33:22,661 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 20:33:26,008 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 20:33:26,008 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-02 20:33:26,011 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub062:1470456:1470456 [0] NCCL INFO cudaDriverVersion 12020 +gpub062:1470456:1470456 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1470456:1470456 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1470456:1470508 [0] NCCL INFO NET/IB : No device found. +gpub062:1470456:1470508 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1470456:1470508 [0] NCCL INFO Using network Socket +gpub062:1470456:1470508 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub062:1470456:1470508 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub062:1470456:1470508 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub062:1470456:1470508 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub062:1470456:1470508 [0] NCCL INFO Connected all rings +gpub062:1470456:1470508 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub062:1470456:1470508 [0] NCCL INFO Connected all trees +gpub062:1470456:1470508 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1470456:1470508 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1470456:1470508 [0] NCCL INFO comm 0x14bf05c0 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub062:1470458:1470458 [2] NCCL INFO cudaDriverVersion 12020 +gpub062:1470458:1470458 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1470458:1470458 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1470458:1470509 [2] NCCL INFO NET/IB : No device found. +gpub062:1470458:1470509 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1470458:1470509 [2] NCCL INFO Using network Socket +gpub062:1470458:1470509 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub062:1470458:1470509 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub062:1470458:1470509 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub062:1470458:1470509 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub062:1470458:1470509 [2] NCCL INFO Connected all rings +gpub062:1470458:1470509 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub062:1470458:1470509 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub062:1470458:1470509 [2] NCCL INFO Connected all trees +gpub062:1470458:1470509 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1470458:1470509 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1470458:1470509 [2] NCCL INFO comm 0xb6031f0 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub062:1470457:1470457 [1] NCCL INFO cudaDriverVersion 12020 +gpub062:1470457:1470457 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1470457:1470457 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1470457:1470510 [1] NCCL INFO NET/IB : No device found. +gpub062:1470457:1470510 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1470457:1470510 [1] NCCL INFO Using network Socket +gpub062:1470457:1470510 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub062:1470457:1470510 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub062:1470457:1470510 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub062:1470457:1470510 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub062:1470457:1470510 [1] NCCL INFO Connected all rings +gpub062:1470457:1470510 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub062:1470457:1470510 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub062:1470457:1470510 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub062:1470457:1470510 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub062:1470457:1470510 [1] NCCL INFO Connected all trees +gpub062:1470457:1470510 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1470457:1470510 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1470457:1470510 [1] NCCL INFO comm 0xd3ed1d0 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub071:440844:440844 [3] NCCL INFO cudaDriverVersion 12020 +gpub071:440844:440844 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.171<0> +gpub071:440844:440844 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub071:440844:440892 [3] NCCL INFO NET/IB : No device found. +gpub071:440844:440892 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.171<0> [1]hsn0:141.142.145.171<0> [2]eth0:fe80::d48a:52dd:fe61:8ea0%eth0<0> +gpub071:440844:440892 [3] NCCL INFO Using network Socket +gpub071:440844:440892 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub071:440844:440892 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub071:440844:440892 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub071:440844:440892 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub071:440844:440892 [3] NCCL INFO Connected all rings +gpub071:440844:440892 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub071:440844:440892 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub071:440844:440892 [3] NCCL INFO Connected all trees +gpub071:440844:440892 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub071:440844:440892 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub071:440844:440892 [3] NCCL INFO comm 0xe33bf10 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub062:1470459:1470459 [3] NCCL INFO cudaDriverVersion 12020 +gpub062:1470459:1470459 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1470459:1470459 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1470459:1470511 [3] NCCL INFO NET/IB : No device found. +gpub062:1470459:1470511 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1470459:1470511 [3] NCCL INFO Using network Socket +gpub062:1470459:1470511 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub062:1470459:1470511 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub062:1470459:1470511 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub062:1470459:1470511 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub062:1470459:1470511 [3] NCCL INFO Connected all rings +gpub062:1470459:1470511 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub062:1470459:1470511 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub062:1470459:1470511 [3] NCCL INFO Connected all trees +gpub062:1470459:1470511 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1470459:1470511 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1470459:1470511 [3] NCCL INFO comm 0x16654700 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub061:2429081:2429081 [0] NCCL INFO cudaDriverVersion 12020 +gpub061:2429081:2429081 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2429081:2429081 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2429081:2429140 [0] NCCL INFO NET/IB : No device found. +gpub061:2429081:2429140 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2429081:2429140 [0] NCCL INFO Using network Socket +gpub061:2429081:2429140 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub061:2429081:2429140 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub061:2429081:2429140 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub061:2429081:2429140 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub061:2429081:2429140 [0] NCCL INFO Connected all rings +gpub061:2429081:2429140 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub061:2429081:2429140 [0] NCCL INFO Connected all trees +gpub061:2429081:2429140 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2429081:2429140 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2429081:2429140 [0] NCCL INFO comm 0x1af5f510 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub061:2429082:2429082 [1] NCCL INFO cudaDriverVersion 12020 +gpub061:2429082:2429082 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2429082:2429082 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2429082:2429142 [1] NCCL INFO NET/IB : No device found. +gpub061:2429082:2429142 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2429082:2429142 [1] NCCL INFO Using network Socket +gpub061:2429082:2429142 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub061:2429082:2429142 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub061:2429082:2429142 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub061:2429082:2429142 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub061:2429082:2429142 [1] NCCL INFO Connected all rings +gpub061:2429082:2429142 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub061:2429082:2429142 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub061:2429082:2429142 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub061:2429082:2429142 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub061:2429082:2429142 [1] NCCL INFO Connected all trees +gpub061:2429082:2429142 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2429082:2429142 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2429082:2429142 [1] NCCL INFO comm 0xe2568f0 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub061:2429083:2429083 [2] NCCL INFO cudaDriverVersion 12020 +gpub061:2429083:2429083 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2429083:2429083 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2429083:2429141 [2] NCCL INFO NET/IB : No device found. +gpub061:2429083:2429141 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2429083:2429141 [2] NCCL INFO Using network Socket +gpub061:2429083:2429141 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub061:2429083:2429141 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub061:2429083:2429141 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub061:2429083:2429141 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub061:2429083:2429141 [2] NCCL INFO Connected all rings +gpub061:2429083:2429141 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub061:2429083:2429141 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub061:2429083:2429141 [2] NCCL INFO Connected all trees +gpub061:2429083:2429141 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2429083:2429141 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2429083:2429141 [2] NCCL INFO comm 0x111d07c0 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub071:440843:440843 [2] NCCL INFO cudaDriverVersion 12020 +gpub071:440843:440843 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.171<0> +gpub071:440843:440843 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub071:440843:440889 [2] NCCL INFO NET/IB : No device found. +gpub071:440843:440889 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.171<0> [1]hsn0:141.142.145.171<0> [2]eth0:fe80::d48a:52dd:fe61:8ea0%eth0<0> +gpub071:440843:440889 [2] NCCL INFO Using network Socket +gpub071:440843:440889 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub071:440843:440889 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub071:440843:440889 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub071:440843:440889 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub071:440843:440889 [2] NCCL INFO Connected all rings +gpub071:440843:440889 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub058:2024731:2024731 [1] NCCL INFO cudaDriverVersion 12020 +gpub058:2024731:2024731 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2024731:2024731 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2024731:2024796 [1] NCCL INFO NET/IB : No device found. +gpub058:2024731:2024796 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2024731:2024796 [1] NCCL INFO Using network Socket +gpub058:2024731:2024796 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub058:2024731:2024796 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub058:2024731:2024796 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub058:2024731:2024796 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub058:2024731:2024796 [1] NCCL INFO Connected all rings +gpub058:2024731:2024796 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub071:440843:440889 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub071:440843:440889 [2] NCCL INFO Connected all trees +gpub071:440843:440889 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub071:440843:440889 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub071:440843:440889 [2] NCCL INFO comm 0x15c0b660 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:2024731:2024796 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub058:2024731:2024796 [1] NCCL INFO Connected all trees +gpub058:2024731:2024796 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2024731:2024796 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2024731:2024796 [1] NCCL INFO comm 0xcc917a0 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub071:440841:440841 [0] NCCL INFO cudaDriverVersion 12020 +gpub071:440841:440841 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.171<0> +gpub071:440841:440841 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub071:440841:440890 [0] NCCL INFO NET/IB : No device found. +gpub071:440841:440890 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.171<0> [1]hsn0:141.142.145.171<0> [2]eth0:fe80::d48a:52dd:fe61:8ea0%eth0<0> +gpub071:440841:440890 [0] NCCL INFO Using network Socket +gpub071:440841:440890 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub071:440841:440890 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub071:440841:440890 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub071:440841:440890 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub071:440841:440890 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub071:440841:440890 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub071:440841:440890 [0] NCCL INFO Connected all rings +gpub071:440841:440890 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub071:440841:440890 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub071:440841:440890 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub071:440841:440890 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub071:440841:440890 [0] NCCL INFO Connected all trees +gpub071:440841:440890 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub071:440841:440890 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub071:440841:440890 [0] NCCL INFO comm 0x14fb51f0 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub071:440842:440842 [1] NCCL INFO cudaDriverVersion 12020 +gpub071:440842:440842 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.171<0> +gpub071:440842:440842 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub071:440842:440891 [1] NCCL INFO NET/IB : No device found. +gpub071:440842:440891 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.171<0> [1]hsn0:141.142.145.171<0> [2]eth0:fe80::d48a:52dd:fe61:8ea0%eth0<0> +gpub071:440842:440891 [1] NCCL INFO Using network Socket +gpub071:440842:440891 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub071:440842:440891 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub071:440842:440891 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub071:440842:440891 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub071:440842:440891 [1] NCCL INFO Connected all rings +gpub071:440842:440891 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub071:440842:440891 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub071:440842:440891 [1] NCCL INFO Connected all trees +gpub071:440842:440891 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub071:440842:440891 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub071:440842:440891 [1] NCCL INFO comm 0xf72c140 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub061:2429084:2429084 [3] NCCL INFO cudaDriverVersion 12020 +gpub061:2429084:2429084 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2429084:2429084 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2429084:2429143 [3] NCCL INFO NET/IB : No device found. +gpub061:2429084:2429143 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2429084:2429143 [3] NCCL INFO Using network Socket +gpub061:2429084:2429143 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub061:2429084:2429143 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub061:2429084:2429143 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub061:2429084:2429143 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub061:2429084:2429143 [3] NCCL INFO Connected all rings +gpub061:2429084:2429143 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub061:2429084:2429143 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub061:2429084:2429143 [3] NCCL INFO Connected all trees +gpub061:2429084:2429143 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2429084:2429143 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2429084:2429143 [3] NCCL INFO comm 0x10f63180 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:2024730:2024793 [0] NCCL INFO NET/IB : No device found. +gpub058:2024730:2024793 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2024730:2024793 [0] NCCL INFO Using network Socket +gpub058:2024730:2024793 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub058:2024730:2024793 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub058:2024730:2024793 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub058:2024730:2024793 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub058:2024730:2024793 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2024730:2024793 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2024730:2024793 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub058:2024730:2024793 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub058:2024730:2024793 [0] NCCL INFO Connected all rings +gpub058:2024730:2024793 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub058:2024730:2024793 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2024730:2024793 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpub058:2024730:2024793 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2024730:2024793 [0] NCCL INFO Connected all trees +gpub058:2024730:2024793 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2024730:2024793 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2024730:2024793 [0] NCCL INFO comm 0x7474f0c0 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub058:2024732:2024732 [2] NCCL INFO cudaDriverVersion 12020 +gpub058:2024732:2024732 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2024732:2024732 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2024732:2024794 [2] NCCL INFO NET/IB : No device found. +gpub058:2024732:2024794 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2024732:2024794 [2] NCCL INFO Using network Socket +gpub058:2024732:2024794 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub058:2024732:2024794 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub058:2024732:2024794 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub058:2024732:2024794 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub058:2024732:2024794 [2] NCCL INFO Connected all rings +gpub058:2024732:2024794 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub058:2024732:2024794 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub058:2024732:2024794 [2] NCCL INFO Connected all trees +gpub058:2024732:2024794 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2024732:2024794 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2024732:2024794 [2] NCCL INFO comm 0xedc40d0 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:2024733:2024733 [3] NCCL INFO cudaDriverVersion 12020 +gpub058:2024733:2024733 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2024733:2024733 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2024733:2024795 [3] NCCL INFO NET/IB : No device found. +gpub058:2024733:2024795 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2024733:2024795 [3] NCCL INFO Using network Socket +gpub058:2024733:2024795 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub058:2024733:2024795 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub058:2024733:2024795 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub058:2024733:2024795 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub058:2024733:2024795 [3] NCCL INFO Connected all rings +gpub058:2024733:2024795 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub058:2024733:2024795 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub058:2024733:2024795 [3] NCCL INFO Connected all trees +gpub058:2024733:2024795 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2024733:2024795 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2024733:2024795 [3] NCCL INFO comm 0x14398bf0 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +[gpub058:0/16] 2024-02-02 20:42:59,792 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub058:0/16] 2024-02-02 20:46:10,549 (trainer:737) INFO: 22epoch:train:1-100batch: iter_time=3.072, forward_time=0.376, loss_ctc=54.605, loss_att=59.349, acc=0.723, loss=57.926, backward_time=0.411, grad_norm=39.680, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.182e-04, train_time=7.857 +[gpub058:0/16] 2024-02-02 20:50:45,607 (trainer:737) INFO: 22epoch:train:101-200batch: iter_time=9.428e-05, forward_time=0.350, loss_ctc=50.542, loss_att=51.612, acc=0.719, loss=51.291, backward_time=0.425, grad_norm=36.846, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.182e-04, train_time=2.750 +[gpub058:0/16] 2024-02-02 20:54:06,235 (trainer:737) INFO: 22epoch:train:201-300batch: iter_time=3.186e-04, forward_time=0.313, loss_ctc=54.663, loss_att=46.797, acc=0.760, loss=49.157, backward_time=0.407, grad_norm=32.838, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.181e-04, train_time=2.007 +[gpub058:0/16] 2024-02-02 20:57:41,945 (trainer:737) INFO: 22epoch:train:301-400batch: iter_time=8.946e-05, forward_time=0.336, loss_ctc=52.116, loss_att=54.303, acc=0.732, loss=53.647, backward_time=0.420, grad_norm=38.768, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.181e-04, train_time=2.157 +[gpub058:0/16] 2024-02-02 21:00:43,152 (trainer:737) INFO: 22epoch:train:401-500batch: iter_time=1.055e-04, forward_time=0.327, loss_ctc=58.133, loss_att=51.993, acc=0.749, loss=53.835, backward_time=0.421, grad_norm=39.318, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.181e-04, train_time=1.811 +[gpub058:0/16] 2024-02-02 21:04:20,921 (trainer:737) INFO: 22epoch:train:501-600batch: iter_time=9.733e-05, forward_time=0.344, loss_ctc=52.104, loss_att=50.746, acc=0.735, loss=51.153, backward_time=0.414, grad_norm=35.890, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.180e-04, train_time=2.179 +[gpub058:0/16] 2024-02-02 21:07:35,569 (trainer:737) INFO: 22epoch:train:601-700batch: iter_time=9.290e-05, forward_time=0.287, loss_ctc=55.156, loss_att=46.061, acc=0.741, loss=48.790, backward_time=0.397, grad_norm=42.747, clip=100.000, loss_scale=7.062e+33, optim_step_time=0.091, optim0_lr0=2.180e-04, train_time=1.946 +[gpub058:0/16] 2024-02-02 21:10:53,191 (trainer:737) INFO: 22epoch:train:701-800batch: iter_time=9.677e-05, forward_time=0.395, loss_ctc=56.217, loss_att=50.572, acc=0.749, loss=52.265, backward_time=0.420, grad_norm=39.491, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.180e-04, train_time=1.975 +[gpub058:0/16] 2024-02-02 21:14:01,106 (trainer:737) INFO: 22epoch:train:801-900batch: iter_time=9.549e-05, forward_time=0.289, loss_ctc=52.629, loss_att=55.688, acc=0.735, loss=54.770, backward_time=0.400, grad_norm=38.894, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.179e-04, train_time=1.880 +[gpub058:0/16] 2024-02-02 21:16:59,310 (trainer:737) INFO: 22epoch:train:901-1000batch: iter_time=9.657e-05, forward_time=0.369, loss_ctc=53.588, loss_att=51.105, acc=0.723, loss=51.850, backward_time=0.413, grad_norm=40.169, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.179e-04, train_time=1.782 +[gpub058:0/16] 2024-02-02 21:19:59,397 (trainer:737) INFO: 22epoch:train:1001-1100batch: iter_time=8.136e-05, forward_time=0.301, loss_ctc=60.142, loss_att=58.070, acc=0.733, loss=58.692, backward_time=0.417, grad_norm=40.980, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.179e-04, train_time=1.798 +[gpub058:0/16] 2024-02-02 21:23:15,247 (trainer:737) INFO: 22epoch:train:1101-1200batch: iter_time=8.064e-05, forward_time=0.287, loss_ctc=51.604, loss_att=51.098, acc=0.734, loss=51.250, backward_time=0.399, grad_norm=36.998, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.178e-04, train_time=1.961 +[gpub058:0/16] 2024-02-02 21:25:05,164 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-02 21:25:23,799 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 21:25:27,255 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 21:25:27,255 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-02 21:25:27,258 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-02 21:34:16,523 (trainer:737) INFO: 22epoch:train:1201-1300batch: iter_time=3.169, forward_time=0.337, loss_ctc=52.096, loss_att=57.080, acc=0.728, loss=55.585, backward_time=0.418, grad_norm=38.095, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.178e-04, train_time=6.613 +[gpub058:0/16] 2024-02-02 21:37:01,583 (trainer:737) INFO: 22epoch:train:1301-1400batch: iter_time=8.324e-05, forward_time=0.333, loss_ctc=53.304, loss_att=57.492, acc=0.704, loss=56.235, backward_time=0.423, grad_norm=39.679, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.178e-04, train_time=1.650 +[gpub058:0/16] 2024-02-02 21:39:30,357 (trainer:737) INFO: 22epoch:train:1401-1500batch: iter_time=8.313e-05, forward_time=0.287, loss_ctc=42.968, loss_att=37.847, acc=0.752, loss=39.383, backward_time=0.398, grad_norm=30.403, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.177e-04, train_time=1.487 +[gpub058:0/16] 2024-02-02 21:40:45,319 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-02 21:42:07,759 (trainer:737) INFO: 22epoch:train:1501-1600batch: iter_time=8.718e-05, forward_time=0.295, loss_ctc=55.182, loss_att=52.618, acc=0.747, loss=53.387, backward_time=0.408, grad_norm=35.755, clip=100.000, loss_scale=7.395e+33, optim_step_time=0.091, optim0_lr0=2.177e-04, train_time=1.574 +[gpub058:0/16] 2024-02-02 21:45:02,452 (trainer:737) INFO: 22epoch:train:1601-1700batch: iter_time=9.128e-05, forward_time=0.375, loss_ctc=56.111, loss_att=53.970, acc=0.734, loss=54.612, backward_time=0.444, grad_norm=35.374, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.177e-04, train_time=1.747 +[gpub058:0/16] 2024-02-02 21:47:45,115 (trainer:737) INFO: 22epoch:train:1701-1800batch: iter_time=8.693e-05, forward_time=0.291, loss_ctc=54.139, loss_att=48.451, acc=0.743, loss=50.158, backward_time=0.403, grad_norm=35.890, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.176e-04, train_time=1.626 +[gpub058:0/16] 2024-02-02 21:50:08,523 (trainer:737) INFO: 22epoch:train:1801-1900batch: iter_time=8.335e-05, forward_time=0.289, loss_ctc=47.506, loss_att=45.046, acc=0.734, loss=45.784, backward_time=0.402, grad_norm=35.443, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.176e-04, train_time=1.433 +[gpub058:0/16] 2024-02-02 21:52:42,711 (trainer:737) INFO: 22epoch:train:1901-2000batch: iter_time=8.193e-05, forward_time=0.305, loss_ctc=54.524, loss_att=45.149, acc=0.732, loss=47.961, backward_time=0.413, grad_norm=43.650, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.176e-04, train_time=1.542 +[gpub058:0/16] 2024-02-02 21:55:35,849 (trainer:737) INFO: 22epoch:train:2001-2100batch: iter_time=8.690e-05, forward_time=0.342, loss_ctc=59.794, loss_att=59.746, acc=0.733, loss=59.760, backward_time=0.419, grad_norm=39.380, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.175e-04, train_time=1.731 +[gpub058:0/16] 2024-02-02 21:58:03,989 (trainer:737) INFO: 22epoch:train:2101-2200batch: iter_time=8.242e-05, forward_time=0.289, loss_ctc=50.537, loss_att=54.357, acc=0.726, loss=53.211, backward_time=0.402, grad_norm=36.613, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.175e-04, train_time=1.481 +[gpub058:0/16] 2024-02-02 22:00:29,399 (trainer:737) INFO: 22epoch:train:2201-2300batch: iter_time=8.149e-05, forward_time=0.292, loss_ctc=48.640, loss_att=47.881, acc=0.730, loss=48.109, backward_time=0.403, grad_norm=34.322, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.175e-04, train_time=1.453 +[gpub058:0/16] 2024-02-02 22:03:18,954 (trainer:737) INFO: 22epoch:train:2301-2400batch: iter_time=8.335e-05, forward_time=0.374, loss_ctc=55.953, loss_att=56.322, acc=0.731, loss=56.211, backward_time=0.439, grad_norm=38.833, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.174e-04, train_time=1.696 +[gpub058:0/16] 2024-02-02 22:06:00,158 (trainer:737) INFO: 22epoch:train:2401-2500batch: iter_time=7.995e-05, forward_time=0.288, loss_ctc=49.520, loss_att=46.109, acc=0.731, loss=47.132, backward_time=0.399, grad_norm=33.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.174e-04, train_time=1.612 +[gpub058:0/16] 2024-02-02 22:06:20,187 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-02 22:06:38,708 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 22:06:42,670 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 22:06:42,670 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-02 22:06:42,674 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-02 22:13:32,522 (trainer:737) INFO: 22epoch:train:2501-2600batch: iter_time=2.997, forward_time=0.380, loss_ctc=53.323, loss_att=59.525, acc=0.728, loss=57.664, backward_time=0.426, grad_norm=37.643, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.174e-04, train_time=4.523 +[gpub058:0/16] 2024-02-02 22:15:52,316 (trainer:737) INFO: 22epoch:train:2601-2700batch: iter_time=8.583e-05, forward_time=0.306, loss_ctc=48.222, loss_att=50.977, acc=0.727, loss=50.151, backward_time=0.406, grad_norm=34.965, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.173e-04, train_time=1.398 +[gpub058:0/16] 2024-02-02 22:18:40,112 (trainer:737) INFO: 22epoch:train:2701-2800batch: iter_time=7.923e-05, forward_time=0.310, loss_ctc=52.831, loss_att=45.762, acc=0.766, loss=47.883, backward_time=0.419, grad_norm=33.753, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.173e-04, train_time=1.678 +[gpub058:0/16] 2024-02-02 22:21:05,510 (trainer:737) INFO: 22epoch:train:2801-2900batch: iter_time=8.115e-05, forward_time=0.342, loss_ctc=50.906, loss_att=53.637, acc=0.736, loss=52.818, backward_time=0.425, grad_norm=36.687, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.173e-04, train_time=1.453 +[gpub058:0/16] 2024-02-02 22:23:30,182 (trainer:737) INFO: 22epoch:train:2901-3000batch: iter_time=8.372e-05, forward_time=0.313, loss_ctc=56.109, loss_att=51.681, acc=0.751, loss=53.009, backward_time=0.420, grad_norm=37.704, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.172e-04, train_time=1.447 +[gpub058:0/16] 2024-02-02 22:26:04,977 (trainer:737) INFO: 22epoch:train:3001-3100batch: iter_time=7.420e-05, forward_time=0.355, loss_ctc=49.939, loss_att=49.832, acc=0.739, loss=49.864, backward_time=0.415, grad_norm=34.651, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.172e-04, train_time=1.548 +[gpub058:0/16] 2024-02-02 22:28:38,778 (trainer:737) INFO: 22epoch:train:3101-3200batch: iter_time=5.763e-04, forward_time=0.308, loss_ctc=52.385, loss_att=45.069, acc=0.744, loss=47.264, backward_time=0.408, grad_norm=40.315, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.172e-04, train_time=1.538 +[gpub058:0/16] 2024-02-02 22:31:07,364 (trainer:737) INFO: 22epoch:train:3201-3300batch: iter_time=7.773e-05, forward_time=0.347, loss_ctc=54.468, loss_att=49.829, acc=0.753, loss=51.220, backward_time=0.440, grad_norm=37.893, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.171e-04, train_time=1.486 +[gpub058:0/16] 2024-02-02 22:33:43,279 (trainer:737) INFO: 22epoch:train:3301-3400batch: iter_time=1.614e-04, forward_time=0.321, loss_ctc=50.544, loss_att=54.836, acc=0.740, loss=53.548, backward_time=0.410, grad_norm=35.949, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.171e-04, train_time=1.558 +[gpub058:0/16] 2024-02-02 22:36:08,226 (trainer:737) INFO: 22epoch:train:3401-3500batch: iter_time=8.242e-05, forward_time=0.333, loss_ctc=51.578, loss_att=50.390, acc=0.729, loss=50.747, backward_time=0.434, grad_norm=38.147, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.170e-04, train_time=1.450 +[gpub058:0/16] 2024-02-02 22:38:47,308 (trainer:737) INFO: 22epoch:train:3501-3600batch: iter_time=0.002, forward_time=0.329, loss_ctc=57.958, loss_att=57.261, acc=0.738, loss=57.470, backward_time=0.420, grad_norm=36.663, clip=100.000, loss_scale=8.152e+33, optim_step_time=0.097, optim0_lr0=2.170e-04, train_time=1.590 +[gpub058:0/16] 2024-02-02 22:41:08,150 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-02 22:41:25,142 (trainer:737) INFO: 22epoch:train:3601-3700batch: iter_time=4.220e-04, forward_time=0.348, loss_ctc=49.342, loss_att=49.126, acc=0.741, loss=49.191, backward_time=0.417, grad_norm=34.289, clip=100.000, loss_scale=9.755e+33, optim_step_time=0.097, optim0_lr0=2.170e-04, train_time=1.579 +[gpub058:0/16] 2024-02-02 22:42:56,635 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-02 22:43:15,347 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 22:43:18,975 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 22:43:18,975 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-02 22:43:18,978 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-02 22:48:55,576 (trainer:737) INFO: 22epoch:train:3701-3800batch: iter_time=3.057, forward_time=0.350, loss_ctc=51.109, loss_att=55.167, acc=0.736, loss=53.949, backward_time=0.447, grad_norm=36.715, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.169e-04, train_time=4.504 +[gpub058:0/16] 2024-02-02 22:51:29,376 (trainer:737) INFO: 22epoch:train:3801-3900batch: iter_time=7.545e-05, forward_time=0.315, loss_ctc=52.851, loss_att=56.611, acc=0.708, loss=55.483, backward_time=0.402, grad_norm=38.396, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.169e-04, train_time=1.538 +[gpub058:0/16] 2024-02-02 22:54:05,378 (trainer:737) INFO: 22epoch:train:3901-4000batch: iter_time=7.910e-05, forward_time=0.320, loss_ctc=42.545, loss_att=37.765, acc=0.754, loss=39.199, backward_time=0.405, grad_norm=29.364, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.169e-04, train_time=1.560 +[gpub058:0/16] 2024-02-02 22:56:36,674 (trainer:737) INFO: 22epoch:train:4001-4100batch: iter_time=8.114e-05, forward_time=0.304, loss_ctc=54.096, loss_att=51.773, acc=0.751, loss=52.470, backward_time=0.416, grad_norm=35.416, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.168e-04, train_time=1.512 +[gpub058:0/16] 2024-02-02 22:58:54,859 (trainer:737) INFO: 22epoch:train:4101-4200batch: iter_time=8.093e-05, forward_time=0.333, loss_ctc=55.593, loss_att=53.843, acc=0.737, loss=54.368, backward_time=0.421, grad_norm=36.693, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.168e-04, train_time=1.382 +[gpub058:0/16] 2024-02-02 23:01:38,322 (trainer:737) INFO: 22epoch:train:4201-4300batch: iter_time=2.448e-04, forward_time=0.325, loss_ctc=53.155, loss_att=47.959, acc=0.747, loss=49.518, backward_time=0.405, grad_norm=35.305, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.168e-04, train_time=1.634 +[gpub058:0/16] 2024-02-02 23:04:03,206 (trainer:737) INFO: 22epoch:train:4301-4400batch: iter_time=8.520e-05, forward_time=0.306, loss_ctc=46.551, loss_att=44.317, acc=0.736, loss=44.987, backward_time=0.421, grad_norm=34.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.167e-04, train_time=1.448 +[gpub058:0/16] 2024-02-02 23:06:37,723 (trainer:737) INFO: 22epoch:train:4401-4500batch: iter_time=8.277e-05, forward_time=0.351, loss_ctc=53.771, loss_att=44.644, acc=0.737, loss=47.382, backward_time=0.413, grad_norm=42.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.167e-04, train_time=1.546 +[gpub058:0/16] 2024-02-02 23:08:54,737 (trainer:737) INFO: 22epoch:train:4501-4600batch: iter_time=1.858e-04, forward_time=0.304, loss_ctc=58.689, loss_att=58.310, acc=0.736, loss=58.424, backward_time=0.419, grad_norm=38.764, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.167e-04, train_time=1.370 +[gpub058:0/16] 2024-02-02 23:11:42,279 (trainer:737) INFO: 22epoch:train:4601-4700batch: iter_time=8.721e-05, forward_time=0.337, loss_ctc=49.167, loss_att=53.840, acc=0.730, loss=52.438, backward_time=0.428, grad_norm=36.468, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.166e-04, train_time=1.674 +[gpub058:0/16] 2024-02-02 23:14:20,570 (trainer:737) INFO: 22epoch:train:4701-4800batch: iter_time=8.879e-04, forward_time=0.347, loss_ctc=48.156, loss_att=48.217, acc=0.731, loss=48.199, backward_time=0.430, grad_norm=35.372, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.166e-04, train_time=1.584 +[gpub058:0/16] 2024-02-02 23:16:54,500 (trainer:737) INFO: 22epoch:train:4801-4900batch: iter_time=8.658e-05, forward_time=0.350, loss_ctc=54.703, loss_att=54.955, acc=0.734, loss=54.879, backward_time=0.447, grad_norm=42.581, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.166e-04, train_time=1.538 +[gpub058:0/16] 2024-02-02 23:19:28,373 (trainer:737) INFO: 22epoch:train:4901-5000batch: iter_time=8.766e-05, forward_time=0.312, loss_ctc=48.506, loss_att=45.667, acc=0.733, loss=46.519, backward_time=0.410, grad_norm=38.431, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.165e-04, train_time=1.539 +[gpub058:0/16] 2024-02-02 23:19:48,562 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-02 23:20:07,257 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 23:20:10,859 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 23:20:10,860 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-02 23:20:10,892 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-02 23:27:26,455 (trainer:737) INFO: 22epoch:train:5001-5100batch: iter_time=3.273, forward_time=0.311, loss_ctc=53.167, loss_att=58.353, acc=0.726, loss=56.797, backward_time=0.420, grad_norm=38.131, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.165e-04, train_time=4.781 +[gpub058:0/16] 2024-02-02 23:30:06,871 (trainer:737) INFO: 22epoch:train:5101-5200batch: iter_time=8.560e-05, forward_time=0.332, loss_ctc=47.431, loss_att=49.019, acc=0.723, loss=48.542, backward_time=0.411, grad_norm=41.624, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.165e-04, train_time=1.603 +[gpub058:0/16] 2024-02-02 23:32:33,416 (trainer:737) INFO: 22epoch:train:5201-5300batch: iter_time=8.219e-05, forward_time=0.322, loss_ctc=52.616, loss_att=46.062, acc=0.757, loss=48.028, backward_time=0.418, grad_norm=33.911, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.164e-04, train_time=1.466 +[gpub058:0/16] 2024-02-02 23:35:10,154 (trainer:737) INFO: 22epoch:train:5301-5400batch: iter_time=8.672e-05, forward_time=0.307, loss_ctc=50.234, loss_att=52.391, acc=0.736, loss=51.744, backward_time=0.402, grad_norm=37.221, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.164e-04, train_time=1.567 +[gpub058:0/16] 2024-02-02 23:37:33,903 (trainer:737) INFO: 22epoch:train:5401-5500batch: iter_time=8.658e-05, forward_time=0.351, loss_ctc=55.457, loss_att=49.079, acc=0.749, loss=50.992, backward_time=0.456, grad_norm=37.923, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.164e-04, train_time=1.437 +[gpub058:0/16] 2024-02-02 23:39:57,298 (trainer:737) INFO: 22epoch:train:5501-5600batch: iter_time=8.016e-05, forward_time=0.309, loss_ctc=49.281, loss_att=49.787, acc=0.726, loss=49.635, backward_time=0.422, grad_norm=34.716, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.163e-04, train_time=1.434 +[gpub058:0/16] 2024-02-02 23:42:32,906 (trainer:737) INFO: 22epoch:train:5601-5700batch: iter_time=8.459e-05, forward_time=0.291, loss_ctc=51.749, loss_att=43.251, acc=0.743, loss=45.800, backward_time=0.399, grad_norm=36.677, clip=100.000, loss_scale=5.815e+33, optim_step_time=0.091, optim0_lr0=2.163e-04, train_time=1.556 +[gpub058:0/16] 2024-02-02 23:44:56,499 (trainer:737) INFO: 22epoch:train:5701-5800batch: iter_time=8.799e-05, forward_time=0.327, loss_ctc=53.557, loss_att=48.942, acc=0.750, loss=50.327, backward_time=0.422, grad_norm=35.259, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.163e-04, train_time=1.436 +[gpub058:0/16] 2024-02-02 23:47:29,688 (trainer:737) INFO: 22epoch:train:5801-5900batch: iter_time=8.139e-05, forward_time=0.317, loss_ctc=49.652, loss_att=54.448, acc=0.737, loss=53.009, backward_time=0.416, grad_norm=37.216, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.162e-04, train_time=1.532 +[gpub058:0/16] 2024-02-02 23:50:06,970 (trainer:737) INFO: 22epoch:train:5901-6000batch: iter_time=9.230e-05, forward_time=0.311, loss_ctc=50.648, loss_att=48.999, acc=0.727, loss=49.494, backward_time=0.414, grad_norm=36.041, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.162e-04, train_time=1.573 +[gpub058:0/16] 2024-02-02 23:52:35,113 (trainer:737) INFO: 22epoch:train:6001-6100batch: iter_time=7.936e-05, forward_time=0.308, loss_ctc=56.060, loss_att=56.425, acc=0.737, loss=56.315, backward_time=0.424, grad_norm=36.248, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.162e-04, train_time=1.481 +[gpub058:0/16] 2024-02-02 23:55:05,274 (trainer:737) INFO: 22epoch:train:6101-6200batch: iter_time=8.342e-05, forward_time=0.337, loss_ctc=48.563, loss_att=48.931, acc=0.735, loss=48.821, backward_time=0.415, grad_norm=35.660, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.161e-04, train_time=1.502 +[gpub058:0/16] 2024-02-02 23:56:43,138 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-02 23:57:01,932 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-02 23:57:05,571 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-02 23:57:05,571 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-02 23:57:05,574 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 00:02:51,976 (trainer:737) INFO: 22epoch:train:6201-6300batch: iter_time=3.106, forward_time=0.303, loss_ctc=50.699, loss_att=54.610, acc=0.727, loss=53.436, backward_time=0.405, grad_norm=37.948, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.161e-04, train_time=4.667 +[gpub058:0/16] 2024-02-03 00:05:11,597 (trainer:737) INFO: 22epoch:train:6301-6400batch: iter_time=7.676e-05, forward_time=0.318, loss_ctc=51.903, loss_att=55.731, acc=0.711, loss=54.582, backward_time=0.424, grad_norm=36.923, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.161e-04, train_time=1.396 +[gpub058:0/16] 2024-02-03 00:07:36,422 (trainer:737) INFO: 22epoch:train:6401-6500batch: iter_time=7.697e-05, forward_time=0.321, loss_ctc=42.050, loss_att=36.935, acc=0.757, loss=38.469, backward_time=0.406, grad_norm=31.031, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.160e-04, train_time=1.448 +[gpub058:0/16] 2024-02-03 00:10:13,880 (trainer:737) INFO: 22epoch:train:6501-6600batch: iter_time=8.625e-05, forward_time=0.311, loss_ctc=54.107, loss_att=51.416, acc=0.752, loss=52.224, backward_time=0.415, grad_norm=36.022, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.160e-04, train_time=1.574 +[gpub058:0/16] 2024-02-03 00:12:40,044 (trainer:737) INFO: 22epoch:train:6601-6700batch: iter_time=8.824e-05, forward_time=0.365, loss_ctc=54.580, loss_att=52.500, acc=0.741, loss=53.124, backward_time=0.438, grad_norm=36.155, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.160e-04, train_time=1.462 +[gpub058:0/16] 2024-02-03 00:15:09,737 (trainer:737) INFO: 22epoch:train:6701-6800batch: iter_time=2.090e-04, forward_time=0.315, loss_ctc=51.993, loss_att=47.123, acc=0.747, loss=48.584, backward_time=0.419, grad_norm=34.131, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.159e-04, train_time=1.497 +[gpub058:0/16] 2024-02-03 00:17:56,548 (trainer:737) INFO: 22epoch:train:6801-6900batch: iter_time=8.775e-05, forward_time=0.390, loss_ctc=46.408, loss_att=44.353, acc=0.736, loss=44.969, backward_time=0.421, grad_norm=33.629, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.159e-04, train_time=1.668 +[gpub058:0/16] 2024-02-03 00:20:16,108 (trainer:737) INFO: 22epoch:train:6901-7000batch: iter_time=8.490e-05, forward_time=0.297, loss_ctc=52.993, loss_att=43.624, acc=0.742, loss=46.435, backward_time=0.401, grad_norm=41.897, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.159e-04, train_time=1.395 +[gpub058:0/16] 2024-02-03 00:21:04,061 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 00:22:35,644 (trainer:737) INFO: 22epoch:train:7001-7100batch: iter_time=4.279e-04, forward_time=0.319, loss_ctc=58.265, loss_att=58.594, acc=0.735, loss=58.495, backward_time=0.422, grad_norm=38.788, clip=100.000, loss_scale=6.923e+33, optim_step_time=0.095, optim0_lr0=2.158e-04, train_time=1.395 +[gpub058:0/16] 2024-02-03 00:25:20,087 (trainer:737) INFO: 22epoch:train:7101-7200batch: iter_time=8.871e-05, forward_time=0.334, loss_ctc=48.586, loss_att=53.609, acc=0.731, loss=52.102, backward_time=0.424, grad_norm=35.666, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.158e-04, train_time=1.645 +[gpub058:0/16] 2024-02-03 00:27:56,095 (trainer:737) INFO: 22epoch:train:7201-7300batch: iter_time=8.506e-05, forward_time=0.318, loss_ctc=47.090, loss_att=48.316, acc=0.730, loss=47.948, backward_time=0.410, grad_norm=35.579, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.158e-04, train_time=1.560 +[gpub058:0/16] 2024-02-03 00:30:20,457 (trainer:737) INFO: 22epoch:train:7301-7400batch: iter_time=9.981e-05, forward_time=0.338, loss_ctc=53.643, loss_att=54.671, acc=0.738, loss=54.362, backward_time=0.440, grad_norm=35.025, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.157e-04, train_time=1.443 +[gpub058:0/16] 2024-02-03 00:32:56,359 (trainer:737) INFO: 22epoch:train:7401-7500batch: iter_time=8.207e-05, forward_time=0.321, loss_ctc=48.220, loss_att=44.945, acc=0.737, loss=45.927, backward_time=0.413, grad_norm=35.056, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.157e-04, train_time=1.559 +[gpub058:0/16] 2024-02-03 00:33:16,545 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-03 00:33:35,549 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 00:33:39,201 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 00:33:39,202 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-03 00:33:39,205 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 00:40:26,154 (trainer:737) INFO: 22epoch:train:7501-7600batch: iter_time=3.116, forward_time=0.347, loss_ctc=52.300, loss_att=57.848, acc=0.728, loss=56.184, backward_time=0.413, grad_norm=38.196, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.157e-04, train_time=4.498 +[gpub058:0/16] 2024-02-03 00:43:03,242 (trainer:737) INFO: 22epoch:train:7601-7700batch: iter_time=8.548e-05, forward_time=0.370, loss_ctc=47.287, loss_att=47.819, acc=0.728, loss=47.659, backward_time=0.446, grad_norm=34.484, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.156e-04, train_time=1.570 +[gpub058:0/16] 2024-02-03 00:45:26,332 (trainer:737) INFO: 22epoch:train:7701-7800batch: iter_time=8.890e-05, forward_time=0.299, loss_ctc=52.565, loss_att=45.986, acc=0.757, loss=47.960, backward_time=0.407, grad_norm=31.601, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.156e-04, train_time=1.432 +[gpub058:0/16] 2024-02-03 00:47:58,322 (trainer:737) INFO: 22epoch:train:7801-7900batch: iter_time=9.277e-05, forward_time=0.290, loss_ctc=49.670, loss_att=51.401, acc=0.739, loss=50.882, backward_time=0.405, grad_norm=35.967, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.156e-04, train_time=1.520 +[gpub058:0/16] 2024-02-03 00:50:29,782 (trainer:737) INFO: 22epoch:train:7901-8000batch: iter_time=2.038e-04, forward_time=0.366, loss_ctc=55.304, loss_att=48.870, acc=0.749, loss=50.800, backward_time=0.456, grad_norm=36.083, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.155e-04, train_time=1.513 +[gpub058:0/16] 2024-02-03 00:52:46,346 (trainer:737) INFO: 22epoch:train:8001-8100batch: iter_time=8.568e-05, forward_time=0.290, loss_ctc=48.580, loss_att=48.277, acc=0.731, loss=48.368, backward_time=0.405, grad_norm=35.934, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.155e-04, train_time=1.366 +[gpub058:0/16] 2024-02-03 00:55:13,564 (trainer:737) INFO: 22epoch:train:8101-8200batch: iter_time=8.795e-05, forward_time=0.287, loss_ctc=52.379, loss_att=43.114, acc=0.743, loss=45.894, backward_time=0.405, grad_norm=40.289, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.155e-04, train_time=1.472 +[gpub058:0/16] 2024-02-03 00:57:59,347 (trainer:737) INFO: 22epoch:train:8201-8300batch: iter_time=8.282e-05, forward_time=0.386, loss_ctc=53.573, loss_att=48.979, acc=0.750, loss=50.357, backward_time=0.438, grad_norm=38.457, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.154e-04, train_time=1.657 +[gpub058:0/16] 2024-02-03 01:00:15,967 (trainer:737) INFO: 22epoch:train:8301-8400batch: iter_time=8.517e-05, forward_time=0.290, loss_ctc=49.159, loss_att=53.407, acc=0.741, loss=52.133, backward_time=0.404, grad_norm=37.424, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.154e-04, train_time=1.367 +[gpub058:0/16] 2024-02-03 01:02:57,826 (trainer:737) INFO: 22epoch:train:8401-8500batch: iter_time=3.523e-04, forward_time=0.402, loss_ctc=49.704, loss_att=47.822, acc=0.731, loss=48.386, backward_time=0.435, grad_norm=36.285, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.154e-04, train_time=1.618 +[gpub058:0/16] 2024-02-03 01:05:30,104 (trainer:737) INFO: 22epoch:train:8501-8600batch: iter_time=8.976e-05, forward_time=0.290, loss_ctc=56.390, loss_att=55.994, acc=0.739, loss=56.113, backward_time=0.404, grad_norm=40.067, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.153e-04, train_time=1.522 +[gpub058:0/16] 2024-02-03 01:07:47,423 (trainer:737) INFO: 22epoch:train:8601-8700batch: iter_time=8.565e-05, forward_time=0.291, loss_ctc=48.017, loss_att=48.385, acc=0.737, loss=48.274, backward_time=0.405, grad_norm=34.471, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.153e-04, train_time=1.374 +[gpub058:0/16] 2024-02-03 01:09:27,114 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-03 01:09:45,948 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 01:09:49,636 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 01:09:49,636 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-03 01:09:49,639 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 01:15:42,539 (trainer:737) INFO: 22epoch:train:8701-8800batch: iter_time=3.206, forward_time=0.419, loss_ctc=50.350, loss_att=55.746, acc=0.732, loss=54.127, backward_time=0.430, grad_norm=35.287, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.153e-04, train_time=4.751 +[gpub058:0/16] 2024-02-03 01:18:20,726 (trainer:737) INFO: 22epoch:train:8801-8900batch: iter_time=2.864e-04, forward_time=0.409, loss_ctc=52.057, loss_att=58.505, acc=0.713, loss=56.571, backward_time=0.430, grad_norm=38.074, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.152e-04, train_time=1.582 +[gpub058:0/16] 2024-02-03 01:21:14,784 (trainer:737) INFO: 22epoch:train:8901-9000batch: iter_time=8.226e-05, forward_time=0.352, loss_ctc=41.931, loss_att=37.602, acc=0.765, loss=38.900, backward_time=0.486, grad_norm=29.814, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=2.152e-04, train_time=1.739 +[gpub058:0/16] 2024-02-03 01:24:00,061 (trainer:737) INFO: 22epoch:train:9001-9100batch: iter_time=6.086e-04, forward_time=0.405, loss_ctc=53.750, loss_att=52.271, acc=0.757, loss=52.715, backward_time=0.429, grad_norm=34.440, clip=100.000, loss_scale=8.619e+33, optim_step_time=0.109, optim0_lr0=2.152e-04, train_time=1.653 +[gpub058:0/16] 2024-02-03 01:26:36,705 (trainer:737) INFO: 22epoch:train:9101-9200batch: iter_time=7.822e-05, forward_time=0.389, loss_ctc=54.552, loss_att=53.830, acc=0.745, loss=54.047, backward_time=0.458, grad_norm=35.089, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.151e-04, train_time=1.567 +[gpub058:0/16] 2024-02-03 01:29:38,596 (trainer:737) INFO: 22epoch:train:9201-9300batch: iter_time=1.676e-04, forward_time=0.369, loss_ctc=52.281, loss_att=49.550, acc=0.757, loss=50.369, backward_time=0.467, grad_norm=34.899, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.151e-04, train_time=1.818 +[gpub058:0/16] 2024-02-03 01:31:49,938 (trainer:737) INFO: 22epoch:train:9301-9400batch: iter_time=7.846e-05, forward_time=0.293, loss_ctc=45.622, loss_att=46.130, acc=0.741, loss=45.977, backward_time=0.404, grad_norm=34.316, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.151e-04, train_time=1.314 +[gpub058:0/16] 2024-02-03 01:34:30,949 (trainer:737) INFO: 22epoch:train:9401-9500batch: iter_time=8.166e-05, forward_time=0.405, loss_ctc=53.151, loss_att=44.191, acc=0.751, loss=46.879, backward_time=0.427, grad_norm=38.780, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.150e-04, train_time=1.609 +[gpub058:0/16] 2024-02-03 01:37:04,742 (trainer:737) INFO: 22epoch:train:9501-9600batch: iter_time=8.798e-05, forward_time=0.296, loss_ctc=57.247, loss_att=58.903, acc=0.746, loss=58.407, backward_time=0.406, grad_norm=37.001, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.150e-04, train_time=1.539 +[gpub058:0/16] 2024-02-03 01:39:41,763 (trainer:737) INFO: 22epoch:train:9601-9700batch: iter_time=3.024e-04, forward_time=0.378, loss_ctc=47.933, loss_att=53.065, acc=0.739, loss=51.526, backward_time=0.449, grad_norm=35.860, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.150e-04, train_time=1.569 +[gpub058:0/16] 2024-02-03 01:41:56,670 (trainer:737) INFO: 22epoch:train:9701-9800batch: iter_time=7.750e-05, forward_time=0.290, loss_ctc=47.317, loss_att=48.845, acc=0.733, loss=48.387, backward_time=0.403, grad_norm=35.385, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.149e-04, train_time=1.347 +[gpub058:0/16] 2024-02-03 01:44:38,127 (trainer:737) INFO: 22epoch:train:9801-9900batch: iter_time=4.114e-04, forward_time=0.354, loss_ctc=54.118, loss_att=55.381, acc=0.743, loss=55.002, backward_time=0.484, grad_norm=38.307, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.149e-04, train_time=1.616 +[gpub058:0/16] 2024-02-03 01:47:15,366 (trainer:737) INFO: 22epoch:train:9901-10000batch: iter_time=8.621e-05, forward_time=0.293, loss_ctc=47.763, loss_att=44.134, acc=0.754, loss=45.223, backward_time=0.402, grad_norm=33.586, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.149e-04, train_time=1.572 +[gpub058:0/16] 2024-02-03 01:47:35,599 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub058:0/16] 2024-02-03 01:47:54,240 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 01:47:57,864 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 01:47:57,865 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-03 01:47:57,868 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 01:54:57,694 (trainer:737) INFO: 22epoch:train:10001-10100batch: iter_time=3.111, forward_time=0.384, loss_ctc=51.723, loss_att=57.270, acc=0.735, loss=55.606, backward_time=0.439, grad_norm=38.253, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.148e-04, train_time=4.624 +[gpub058:0/16] 2024-02-03 01:57:23,563 (trainer:737) INFO: 22epoch:train:10101-10200batch: iter_time=8.089e-05, forward_time=0.288, loss_ctc=46.786, loss_att=50.283, acc=0.731, loss=49.234, backward_time=0.404, grad_norm=33.774, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.148e-04, train_time=1.457 +[gpub058:0/16] 2024-02-03 02:00:06,778 (trainer:737) INFO: 22epoch:train:10201-10300batch: iter_time=8.189e-05, forward_time=0.298, loss_ctc=51.752, loss_att=45.335, acc=0.768, loss=47.260, backward_time=0.406, grad_norm=32.286, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.148e-04, train_time=1.634 +[gpub058:0/16] 2024-02-03 02:02:47,616 (trainer:737) INFO: 22epoch:train:10301-10400batch: iter_time=8.612e-05, forward_time=0.424, loss_ctc=49.480, loss_att=52.527, acc=0.742, loss=51.613, backward_time=0.428, grad_norm=34.832, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.147e-04, train_time=1.608 +[gpub058:0/16] 2024-02-03 02:05:23,968 (trainer:737) INFO: 22epoch:train:10401-10500batch: iter_time=8.649e-05, forward_time=0.291, loss_ctc=54.893, loss_att=50.454, acc=0.757, loss=51.786, backward_time=0.405, grad_norm=37.263, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.147e-04, train_time=1.562 +[gpub058:0/16] 2024-02-03 02:08:00,523 (trainer:737) INFO: 22epoch:train:10501-10600batch: iter_time=8.532e-05, forward_time=0.410, loss_ctc=48.337, loss_att=48.260, acc=0.745, loss=48.283, backward_time=0.436, grad_norm=35.360, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.147e-04, train_time=1.567 +[gpub058:0/16] 2024-02-03 02:10:42,174 (trainer:737) INFO: 22epoch:train:10601-10700batch: iter_time=8.673e-05, forward_time=0.288, loss_ctc=51.698, loss_att=44.120, acc=0.750, loss=46.394, backward_time=0.402, grad_norm=38.163, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.146e-04, train_time=1.616 +[gpub058:0/16] 2024-02-03 02:13:04,355 (trainer:737) INFO: 22epoch:train:10701-10800batch: iter_time=8.389e-05, forward_time=0.333, loss_ctc=53.604, loss_att=49.082, acc=0.759, loss=50.439, backward_time=0.421, grad_norm=35.938, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.146e-04, train_time=1.421 +[gpub058:0/16] 2024-02-03 02:15:54,423 (trainer:737) INFO: 22epoch:train:10801-10900batch: iter_time=9.021e-05, forward_time=0.384, loss_ctc=49.008, loss_att=54.406, acc=0.744, loss=52.786, backward_time=0.423, grad_norm=33.964, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.146e-04, train_time=1.701 +[gpub058:0/16] 2024-02-03 02:18:20,796 (trainer:737) INFO: 22epoch:train:10901-11000batch: iter_time=8.535e-05, forward_time=0.287, loss_ctc=49.829, loss_att=49.195, acc=0.734, loss=49.385, backward_time=0.400, grad_norm=37.442, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.145e-04, train_time=1.464 +[gpub058:0/16] 2024-02-03 02:20:17,061 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 02:20:49,844 (trainer:737) INFO: 22epoch:train:11001-11100batch: iter_time=8.282e-05, forward_time=0.354, loss_ctc=55.549, loss_att=56.173, acc=0.741, loss=55.986, backward_time=0.476, grad_norm=39.487, clip=100.000, loss_scale=1.458e+34, optim_step_time=0.102, optim0_lr0=2.145e-04, train_time=1.490 +[gpub058:0/16] 2024-02-03 02:21:14,432 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 02:23:25,304 (trainer:737) INFO: 22epoch:train:11101-11200batch: iter_time=9.115e-05, forward_time=0.289, loss_ctc=48.161, loss_att=48.919, acc=0.743, loss=48.692, backward_time=0.403, grad_norm=33.252, clip=100.000, loss_scale=6.136e+33, optim_step_time=0.091, optim0_lr0=2.145e-04, train_time=1.555 +[gpub058:0/16] 2024-02-03 02:24:53,219 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-03 02:25:12,724 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 02:25:16,295 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 02:25:16,296 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-03 02:25:16,299 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 02:30:54,566 (trainer:737) INFO: 22epoch:train:11201-11300batch: iter_time=3.060, forward_time=0.330, loss_ctc=49.675, loss_att=55.146, acc=0.737, loss=53.505, backward_time=0.411, grad_norm=37.522, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.144e-04, train_time=4.492 +[gpub058:0/16] 2024-02-03 02:33:41,864 (trainer:737) INFO: 22epoch:train:11301-11400batch: iter_time=7.636e-05, forward_time=0.330, loss_ctc=51.704, loss_att=56.672, acc=0.711, loss=55.181, backward_time=0.422, grad_norm=38.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.144e-04, train_time=1.672 +[gpub058:0/16] 2024-02-03 02:36:08,831 (trainer:737) INFO: 22epoch:train:11401-11500batch: iter_time=8.306e-05, forward_time=0.360, loss_ctc=41.820, loss_att=37.683, acc=0.757, loss=38.924, backward_time=0.428, grad_norm=29.525, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.144e-04, train_time=1.470 +[gpub058:0/16] 2024-02-03 02:39:01,612 (trainer:737) INFO: 22epoch:train:11501-11600batch: iter_time=9.158e-05, forward_time=0.323, loss_ctc=53.320, loss_att=51.718, acc=0.753, loss=52.198, backward_time=0.410, grad_norm=36.060, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.143e-04, train_time=1.728 +[gpub058:0/16] 2024-02-03 02:41:17,841 (trainer:737) INFO: 22epoch:train:11601-11700batch: iter_time=8.745e-05, forward_time=0.293, loss_ctc=54.218, loss_att=52.272, acc=0.743, loss=52.856, backward_time=0.406, grad_norm=35.885, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.143e-04, train_time=1.362 +[gpub058:0/16] 2024-02-03 02:44:04,831 (trainer:737) INFO: 22epoch:train:11701-11800batch: iter_time=4.196e-04, forward_time=0.316, loss_ctc=52.450, loss_att=47.782, acc=0.747, loss=49.183, backward_time=0.416, grad_norm=36.181, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.143e-04, train_time=1.668 +[gpub058:0/16] 2024-02-03 02:46:27,803 (trainer:737) INFO: 22epoch:train:11801-11900batch: iter_time=8.328e-05, forward_time=0.356, loss_ctc=45.977, loss_att=44.310, acc=0.737, loss=44.810, backward_time=0.414, grad_norm=34.076, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.142e-04, train_time=1.431 +[gpub058:0/16] 2024-02-03 02:49:16,193 (trainer:737) INFO: 22epoch:train:11901-12000batch: iter_time=8.476e-05, forward_time=0.288, loss_ctc=52.100, loss_att=43.602, acc=0.744, loss=46.152, backward_time=0.398, grad_norm=40.451, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.142e-04, train_time=1.683 +[gpub058:0/16] 2024-02-03 02:51:39,496 (trainer:737) INFO: 22epoch:train:12001-12100batch: iter_time=4.064e-04, forward_time=0.329, loss_ctc=56.963, loss_att=58.404, acc=0.737, loss=57.971, backward_time=0.462, grad_norm=39.344, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.142e-04, train_time=1.433 +[gpub058:0/16] 2024-02-03 02:54:05,061 (trainer:737) INFO: 22epoch:train:12101-12200batch: iter_time=8.791e-05, forward_time=0.338, loss_ctc=47.564, loss_att=53.367, acc=0.732, loss=51.626, backward_time=0.418, grad_norm=36.520, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.141e-04, train_time=1.456 +[gpub058:0/16] 2024-02-03 02:56:55,343 (trainer:737) INFO: 22epoch:train:12201-12300batch: iter_time=1.027e-04, forward_time=0.313, loss_ctc=46.717, loss_att=48.209, acc=0.732, loss=47.762, backward_time=0.425, grad_norm=36.644, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.141e-04, train_time=1.702 +[gpub058:0/16] 2024-02-03 02:59:11,081 (trainer:737) INFO: 22epoch:train:12301-12400batch: iter_time=9.116e-05, forward_time=0.312, loss_ctc=53.995, loss_att=55.080, acc=0.738, loss=54.754, backward_time=0.414, grad_norm=37.800, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.141e-04, train_time=1.358 +[gpub058:0/16] 2024-02-03 03:02:04,814 (trainer:737) INFO: 22epoch:train:12401-12500batch: iter_time=8.637e-05, forward_time=0.327, loss_ctc=47.496, loss_att=44.960, acc=0.738, loss=45.721, backward_time=0.431, grad_norm=34.649, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.140e-04, train_time=1.736 +[gpub058:0/16] 2024-02-03 03:02:24,872 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-03 03:02:43,740 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 03:02:47,238 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 03:02:47,238 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-03 03:02:47,241 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 03:09:13,002 (trainer:737) INFO: 22epoch:train:12501-12600batch: iter_time=2.802, forward_time=0.324, loss_ctc=51.748, loss_att=58.705, acc=0.733, loss=56.618, backward_time=0.409, grad_norm=37.083, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.140e-04, train_time=4.282 +[gpub058:0/16] 2024-02-03 03:12:05,177 (trainer:737) INFO: 22epoch:train:12601-12700batch: iter_time=8.414e-05, forward_time=0.329, loss_ctc=46.801, loss_att=50.786, acc=0.730, loss=49.591, backward_time=0.416, grad_norm=130.987, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.140e-04, train_time=1.722 +[gpub058:0/16] 2024-02-03 03:14:29,086 (trainer:737) INFO: 22epoch:train:12701-12800batch: iter_time=8.360e-05, forward_time=0.375, loss_ctc=51.528, loss_att=45.076, acc=0.767, loss=47.011, backward_time=0.421, grad_norm=31.587, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.139e-04, train_time=1.439 +[gpub058:0/16] 2024-02-03 03:17:15,074 (trainer:737) INFO: 22epoch:train:12801-12900batch: iter_time=8.793e-05, forward_time=0.290, loss_ctc=49.206, loss_att=52.643, acc=0.741, loss=51.612, backward_time=0.403, grad_norm=36.493, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.139e-04, train_time=1.660 +[gpub058:0/16] 2024-02-03 03:19:51,241 (trainer:737) INFO: 22epoch:train:12901-13000batch: iter_time=1.046e-04, forward_time=0.351, loss_ctc=54.860, loss_att=50.896, acc=0.756, loss=52.085, backward_time=0.418, grad_norm=37.526, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.139e-04, train_time=1.562 +[gpub058:0/16] 2024-02-03 03:22:38,325 (trainer:737) INFO: 22epoch:train:13001-13100batch: iter_time=2.332e-04, forward_time=0.312, loss_ctc=48.788, loss_att=48.619, acc=0.746, loss=48.670, backward_time=0.435, grad_norm=35.273, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.138e-04, train_time=1.670 +[gpub058:0/16] 2024-02-03 03:25:08,371 (trainer:737) INFO: 22epoch:train:13101-13200batch: iter_time=8.872e-05, forward_time=0.289, loss_ctc=50.875, loss_att=44.430, acc=0.750, loss=46.364, backward_time=0.400, grad_norm=38.619, clip=100.000, loss_scale=9.398e+33, optim_step_time=0.091, optim0_lr0=2.138e-04, train_time=1.500 +[gpub058:0/16] 2024-02-03 03:27:47,154 (trainer:737) INFO: 22epoch:train:13201-13300batch: iter_time=1.180e-04, forward_time=0.351, loss_ctc=52.858, loss_att=48.543, acc=0.760, loss=49.837, backward_time=0.414, grad_norm=34.418, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.138e-04, train_time=1.588 +[gpub058:0/16] 2024-02-03 03:30:16,848 (trainer:737) INFO: 22epoch:train:13301-13400batch: iter_time=8.396e-05, forward_time=0.303, loss_ctc=49.060, loss_att=53.704, acc=0.746, loss=52.311, backward_time=0.442, grad_norm=37.799, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.138e-04, train_time=1.496 +[gpub058:0/16] 2024-02-03 03:32:41,460 (trainer:737) INFO: 22epoch:train:13401-13500batch: iter_time=8.930e-05, forward_time=0.290, loss_ctc=49.287, loss_att=48.869, acc=0.737, loss=48.995, backward_time=0.403, grad_norm=36.542, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.137e-04, train_time=1.446 +[gpub058:0/16] 2024-02-03 03:35:36,710 (trainer:737) INFO: 22epoch:train:13501-13600batch: iter_time=9.990e-05, forward_time=0.349, loss_ctc=54.809, loss_att=56.411, acc=0.743, loss=55.931, backward_time=0.418, grad_norm=34.903, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.137e-04, train_time=1.752 +[gpub058:0/16] 2024-02-03 03:38:09,676 (trainer:737) INFO: 22epoch:train:13601-13700batch: iter_time=2.242e-04, forward_time=0.310, loss_ctc=47.673, loss_att=48.559, acc=0.744, loss=48.293, backward_time=0.443, grad_norm=34.067, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.137e-04, train_time=1.528 +[gpub058:0/16] 2024-02-03 03:39:48,161 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-03 03:40:07,004 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 03:40:10,522 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 03:40:10,522 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-03 03:40:10,525 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 03:45:48,627 (trainer:737) INFO: 22epoch:train:13701-13800batch: iter_time=3.061, forward_time=0.337, loss_ctc=50.152, loss_att=53.601, acc=0.746, loss=52.566, backward_time=0.409, grad_norm=36.162, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.136e-04, train_time=4.590 +[gpub058:0/16] 2024-02-03 03:47:02,743 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 03:48:23,932 (trainer:737) INFO: 22epoch:train:13801-13900batch: iter_time=7.960e-05, forward_time=0.330, loss_ctc=51.492, loss_att=55.862, acc=0.719, loss=54.551, backward_time=0.431, grad_norm=38.086, clip=100.000, loss_scale=7.710e+33, optim_step_time=0.096, optim0_lr0=2.136e-04, train_time=1.553 +[gpub058:0/16] 2024-02-03 03:51:03,367 (trainer:737) INFO: 22epoch:train:13901-14000batch: iter_time=8.045e-05, forward_time=0.288, loss_ctc=41.146, loss_att=36.387, acc=0.769, loss=37.815, backward_time=0.401, grad_norm=29.354, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.136e-04, train_time=1.592 +[gpub058:0/16] 2024-02-03 03:53:35,257 (trainer:737) INFO: 22epoch:train:14001-14100batch: iter_time=8.033e-05, forward_time=0.421, loss_ctc=52.815, loss_att=51.156, acc=0.759, loss=51.653, backward_time=0.433, grad_norm=35.543, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.135e-04, train_time=1.521 +[gpub058:0/16] 2024-02-03 03:56:03,598 (trainer:737) INFO: 22epoch:train:14101-14200batch: iter_time=7.994e-05, forward_time=0.291, loss_ctc=54.308, loss_att=52.523, acc=0.747, loss=53.059, backward_time=0.405, grad_norm=35.214, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.135e-04, train_time=1.484 +[gpub058:0/16] 2024-02-03 03:58:42,489 (trainer:737) INFO: 22epoch:train:14201-14300batch: iter_time=8.164e-05, forward_time=0.326, loss_ctc=52.227, loss_att=48.222, acc=0.760, loss=49.424, backward_time=0.413, grad_norm=35.670, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.135e-04, train_time=1.588 +[gpub058:0/16] 2024-02-03 04:01:19,509 (trainer:737) INFO: 22epoch:train:14301-14400batch: iter_time=2.542e-04, forward_time=0.336, loss_ctc=45.418, loss_att=45.666, acc=0.741, loss=45.591, backward_time=0.424, grad_norm=33.451, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.134e-04, train_time=1.570 +[gpub058:0/16] 2024-02-03 04:03:29,767 (trainer:737) INFO: 22epoch:train:14401-14500batch: iter_time=7.956e-05, forward_time=0.288, loss_ctc=51.726, loss_att=42.976, acc=0.751, loss=45.601, backward_time=0.400, grad_norm=41.077, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.134e-04, train_time=1.303 +[gpub058:0/16] 2024-02-03 04:06:10,302 (trainer:737) INFO: 22epoch:train:14501-14600batch: iter_time=8.087e-05, forward_time=0.323, loss_ctc=56.438, loss_att=57.950, acc=0.750, loss=57.496, backward_time=0.421, grad_norm=37.255, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.134e-04, train_time=1.605 +[gpub058:0/16] 2024-02-03 04:08:44,586 (trainer:737) INFO: 22epoch:train:14601-14700batch: iter_time=4.724e-04, forward_time=0.311, loss_ctc=47.218, loss_att=52.884, acc=0.741, loss=51.184, backward_time=0.421, grad_norm=34.465, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.133e-04, train_time=1.543 +[gpub058:0/16] 2024-02-03 04:10:55,110 (trainer:737) INFO: 22epoch:train:14701-14800batch: iter_time=8.106e-05, forward_time=0.290, loss_ctc=46.451, loss_att=48.028, acc=0.736, loss=47.555, backward_time=0.402, grad_norm=33.503, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.133e-04, train_time=1.305 +[gpub058:0/16] 2024-02-03 04:13:44,423 (trainer:737) INFO: 22epoch:train:14801-14900batch: iter_time=3.924e-04, forward_time=0.371, loss_ctc=53.077, loss_att=54.845, acc=0.745, loss=54.315, backward_time=0.447, grad_norm=35.861, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.133e-04, train_time=1.692 +[gpub058:0/16] 2024-02-03 04:16:09,738 (trainer:737) INFO: 22epoch:train:14901-15000batch: iter_time=7.975e-05, forward_time=0.291, loss_ctc=47.528, loss_att=43.855, acc=0.755, loss=44.957, backward_time=0.403, grad_norm=33.806, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.090, optim0_lr0=2.132e-04, train_time=1.454 +[gpub058:0/16] 2024-02-03 04:53:13,170 (trainer:343) INFO: 22epoch results: [train] iter_time=0.247, forward_time=0.329, loss_ctc=51.350, loss_att=50.500, acc=0.740, loss=50.755, backward_time=0.419, grad_norm=37.038, clip=100.000, loss_scale=7.013e+33, optim_step_time=0.094, optim0_lr0=2.157e-04, train_time=1.852, time=7 hours, 43 minutes and 28.6 seconds, total_count=360000, gpu_max_cached_mem_GB=43.281, [valid] loss_ctc=39.810, cer_ctc=0.204, loss_att=39.841, acc=0.671, cer=0.327, wer=0.992, loss=39.831, time=36 minutes and 39.42 seconds, total_count=112104, gpu_max_cached_mem_GB=43.281 +[gpub058:0/16] 2024-02-03 04:53:28,993 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub058:0/16] 2024-02-03 04:53:29,098 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/14epoch.pth, exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/17epoch.pth +[gpub058:0/16] 2024-02-03 04:53:29,098 (trainer:272) INFO: 23/45epoch started. Estimated time to finish: 1 week, 23 hours and 49 minutes +[gpub058:0/16] 2024-02-03 04:53:29,108 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-03 04:53:46,653 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 04:53:49,993 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 04:53:49,993 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-03 04:53:49,996 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 05:00:18,603 (trainer:737) INFO: 23epoch:train:1-100batch: iter_time=2.657, forward_time=0.345, loss_ctc=48.675, loss_att=38.911, acc=0.768, loss=41.840, backward_time=0.412, grad_norm=38.161, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.132e-04, train_time=4.095 +[gpub058:0/16] 2024-02-03 05:02:45,266 (trainer:737) INFO: 23epoch:train:101-200batch: iter_time=8.325e-05, forward_time=0.351, loss_ctc=45.886, loss_att=49.549, acc=0.734, loss=48.450, backward_time=0.466, grad_norm=34.465, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.132e-04, train_time=1.466 +[gpub058:0/16] 2024-02-03 05:05:29,783 (trainer:737) INFO: 23epoch:train:201-300batch: iter_time=2.205e-04, forward_time=0.376, loss_ctc=57.730, loss_att=46.255, acc=0.757, loss=49.697, backward_time=0.453, grad_norm=43.856, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.131e-04, train_time=1.645 +[gpub058:0/16] 2024-02-03 05:07:54,621 (trainer:737) INFO: 23epoch:train:301-400batch: iter_time=7.955e-05, forward_time=0.308, loss_ctc=44.920, loss_att=49.606, acc=0.728, loss=48.200, backward_time=0.420, grad_norm=37.150, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.131e-04, train_time=1.448 +[gpub058:0/16] 2024-02-03 05:10:25,142 (trainer:737) INFO: 23epoch:train:401-500batch: iter_time=8.253e-05, forward_time=0.319, loss_ctc=61.257, loss_att=59.983, acc=0.727, loss=60.366, backward_time=0.416, grad_norm=49.477, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.131e-04, train_time=1.505 +[gpub058:0/16] 2024-02-03 05:13:02,591 (trainer:737) INFO: 23epoch:train:501-600batch: iter_time=5.192e-04, forward_time=0.361, loss_ctc=53.167, loss_att=47.817, acc=0.761, loss=49.422, backward_time=0.453, grad_norm=35.013, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.130e-04, train_time=1.574 +[gpub058:0/16] 2024-02-03 05:15:30,555 (trainer:737) INFO: 23epoch:train:601-700batch: iter_time=5.661e-04, forward_time=0.367, loss_ctc=51.050, loss_att=52.503, acc=0.739, loss=52.067, backward_time=0.442, grad_norm=36.419, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.130e-04, train_time=1.480 +[gpub058:0/16] 2024-02-03 05:18:11,254 (trainer:737) INFO: 23epoch:train:701-800batch: iter_time=8.446e-05, forward_time=0.338, loss_ctc=50.481, loss_att=42.897, acc=0.757, loss=45.172, backward_time=0.454, grad_norm=35.924, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.130e-04, train_time=1.607 +[gpub058:0/16] 2024-02-03 05:20:47,776 (trainer:737) INFO: 23epoch:train:801-900batch: iter_time=4.800e-04, forward_time=0.316, loss_ctc=55.886, loss_att=47.281, acc=0.746, loss=49.863, backward_time=0.423, grad_norm=44.063, clip=100.000, loss_scale=7.840e+33, optim_step_time=0.096, optim0_lr0=2.129e-04, train_time=1.565 +[gpub058:0/16] 2024-02-03 05:23:16,817 (trainer:737) INFO: 23epoch:train:901-1000batch: iter_time=7.480e-04, forward_time=0.365, loss_ctc=50.822, loss_att=53.939, acc=0.747, loss=53.004, backward_time=0.446, grad_norm=35.068, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.129e-04, train_time=1.490 +[gpub058:0/16] 2024-02-03 05:25:48,680 (trainer:737) INFO: 23epoch:train:1001-1100batch: iter_time=4.711e-04, forward_time=0.359, loss_ctc=48.883, loss_att=47.305, acc=0.737, loss=47.778, backward_time=0.451, grad_norm=36.612, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.129e-04, train_time=1.519 +[gpub058:0/16] 2024-02-03 05:28:38,486 (trainer:737) INFO: 23epoch:train:1101-1200batch: iter_time=0.001, forward_time=0.356, loss_ctc=61.897, loss_att=65.780, acc=0.701, loss=64.615, backward_time=0.441, grad_norm=48.801, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.128e-04, train_time=1.698 +[gpub058:0/16] 2024-02-03 05:30:14,713 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-03 05:30:33,659 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 05:30:37,275 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 05:30:37,275 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-03 05:30:37,279 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 05:36:10,430 (trainer:737) INFO: 23epoch:train:1201-1300batch: iter_time=3.038, forward_time=0.348, loss_ctc=44.341, loss_att=37.322, acc=0.761, loss=39.428, backward_time=0.418, grad_norm=35.045, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.128e-04, train_time=4.520 +[gpub058:0/16] 2024-02-03 05:37:30,573 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 05:38:35,542 (trainer:737) INFO: 23epoch:train:1301-1400batch: iter_time=7.993e-05, forward_time=0.324, loss_ctc=46.230, loss_att=42.961, acc=0.748, loss=43.942, backward_time=0.412, grad_norm=36.547, clip=100.000, loss_scale=8.234e+33, optim_step_time=0.093, optim0_lr0=2.128e-04, train_time=1.450 +[gpub058:0/16] 2024-02-03 05:41:11,618 (trainer:737) INFO: 23epoch:train:1401-1500batch: iter_time=8.753e-05, forward_time=0.346, loss_ctc=54.703, loss_att=52.927, acc=0.746, loss=53.459, backward_time=0.430, grad_norm=39.896, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.128e-04, train_time=1.560 +[gpub058:0/16] 2024-02-03 05:43:25,030 (trainer:737) INFO: 23epoch:train:1501-1600batch: iter_time=9.607e-05, forward_time=0.290, loss_ctc=45.957, loss_att=46.148, acc=0.749, loss=46.091, backward_time=0.403, grad_norm=35.084, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.127e-04, train_time=1.335 +[gpub058:0/16] 2024-02-03 05:45:47,956 (trainer:737) INFO: 23epoch:train:1601-1700batch: iter_time=9.147e-05, forward_time=0.318, loss_ctc=47.807, loss_att=51.435, acc=0.725, loss=50.346, backward_time=0.418, grad_norm=36.134, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.127e-04, train_time=1.429 +[gpub058:0/16] 2024-02-03 05:48:26,955 (trainer:737) INFO: 23epoch:train:1701-1800batch: iter_time=8.991e-05, forward_time=0.329, loss_ctc=58.813, loss_att=54.698, acc=0.751, loss=55.933, backward_time=0.417, grad_norm=47.770, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.127e-04, train_time=1.589 +[gpub058:0/16] 2024-02-03 05:50:52,027 (trainer:737) INFO: 23epoch:train:1801-1900batch: iter_time=9.214e-05, forward_time=0.331, loss_ctc=49.506, loss_att=47.621, acc=0.753, loss=48.186, backward_time=0.419, grad_norm=32.265, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.126e-04, train_time=1.450 +[gpub058:0/16] 2024-02-03 05:53:32,329 (trainer:737) INFO: 23epoch:train:1901-2000batch: iter_time=8.735e-05, forward_time=0.327, loss_ctc=50.877, loss_att=47.230, acc=0.749, loss=48.324, backward_time=0.410, grad_norm=35.214, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.126e-04, train_time=1.604 +[gpub058:0/16] 2024-02-03 05:55:57,176 (trainer:737) INFO: 23epoch:train:2001-2100batch: iter_time=8.986e-05, forward_time=0.308, loss_ctc=53.709, loss_att=44.980, acc=0.759, loss=47.598, backward_time=0.411, grad_norm=38.431, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.126e-04, train_time=1.448 +[gpub058:0/16] 2024-02-03 05:58:23,072 (trainer:737) INFO: 23epoch:train:2101-2200batch: iter_time=1.027e-04, forward_time=0.336, loss_ctc=53.783, loss_att=54.688, acc=0.738, loss=54.416, backward_time=0.429, grad_norm=37.919, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.125e-04, train_time=1.458 +[gpub058:0/16] 2024-02-03 06:00:56,544 (trainer:737) INFO: 23epoch:train:2201-2300batch: iter_time=8.169e-05, forward_time=0.316, loss_ctc=45.687, loss_att=45.971, acc=0.754, loss=45.886, backward_time=0.406, grad_norm=31.252, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.125e-04, train_time=1.534 +[gpub058:0/16] 2024-02-03 06:03:21,161 (trainer:737) INFO: 23epoch:train:2301-2400batch: iter_time=8.976e-05, forward_time=0.309, loss_ctc=59.312, loss_att=56.915, acc=0.716, loss=57.634, backward_time=0.411, grad_norm=43.625, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.125e-04, train_time=1.447 +[gpub058:0/16] 2024-02-03 06:05:54,295 (trainer:737) INFO: 23epoch:train:2401-2500batch: iter_time=8.717e-05, forward_time=0.348, loss_ctc=45.848, loss_att=50.824, acc=0.728, loss=49.331, backward_time=0.421, grad_norm=38.792, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.124e-04, train_time=1.530 +[gpub058:0/16] 2024-02-03 06:06:14,493 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-03 06:06:33,044 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 06:06:36,533 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 06:06:36,533 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-03 06:06:36,573 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 06:13:22,739 (trainer:737) INFO: 23epoch:train:2501-2600batch: iter_time=2.925, forward_time=0.384, loss_ctc=47.007, loss_att=40.988, acc=0.754, loss=42.794, backward_time=0.421, grad_norm=38.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.124e-04, train_time=4.485 +[gpub058:0/16] 2024-02-03 06:15:52,397 (trainer:737) INFO: 23epoch:train:2601-2700batch: iter_time=8.190e-05, forward_time=0.330, loss_ctc=44.493, loss_att=48.528, acc=0.732, loss=47.317, backward_time=0.409, grad_norm=33.229, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.124e-04, train_time=1.496 +[gpub058:0/16] 2024-02-03 06:18:16,438 (trainer:737) INFO: 23epoch:train:2701-2800batch: iter_time=8.206e-05, forward_time=0.319, loss_ctc=52.554, loss_att=47.571, acc=0.746, loss=49.066, backward_time=0.459, grad_norm=42.451, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.123e-04, train_time=1.441 +[gpub058:0/16] 2024-02-03 06:21:04,294 (trainer:737) INFO: 23epoch:train:2801-2900batch: iter_time=9.180e-05, forward_time=0.325, loss_ctc=43.902, loss_att=49.065, acc=0.729, loss=47.516, backward_time=0.400, grad_norm=35.526, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.123e-04, train_time=1.678 +[gpub058:0/16] 2024-02-03 06:23:29,796 (trainer:737) INFO: 23epoch:train:2901-3000batch: iter_time=8.959e-05, forward_time=0.337, loss_ctc=59.261, loss_att=57.702, acc=0.723, loss=58.170, backward_time=0.462, grad_norm=53.841, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.123e-04, train_time=1.454 +[gpub058:0/16] 2024-02-03 06:25:59,420 (trainer:737) INFO: 23epoch:train:3001-3100batch: iter_time=9.289e-05, forward_time=0.304, loss_ctc=51.844, loss_att=48.126, acc=0.749, loss=49.242, backward_time=0.406, grad_norm=35.373, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.122e-04, train_time=1.497 +[gpub058:0/16] 2024-02-03 06:28:39,274 (trainer:737) INFO: 23epoch:train:3101-3200batch: iter_time=9.075e-05, forward_time=0.347, loss_ctc=50.173, loss_att=50.581, acc=0.735, loss=50.459, backward_time=0.423, grad_norm=34.861, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.122e-04, train_time=1.598 +[gpub058:0/16] 2024-02-03 06:31:00,704 (trainer:737) INFO: 23epoch:train:3201-3300batch: iter_time=8.518e-05, forward_time=0.351, loss_ctc=48.529, loss_att=41.865, acc=0.760, loss=43.864, backward_time=0.416, grad_norm=36.544, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.122e-04, train_time=1.414 +[gpub058:0/16] 2024-02-03 06:33:42,771 (trainer:737) INFO: 23epoch:train:3301-3400batch: iter_time=9.129e-05, forward_time=0.316, loss_ctc=53.505, loss_att=44.814, acc=0.754, loss=47.421, backward_time=0.433, grad_norm=47.057, clip=100.000, loss_scale=7.321e+33, optim_step_time=0.094, optim0_lr0=2.121e-04, train_time=1.621 +[gpub058:0/16] 2024-02-03 06:36:19,245 (trainer:737) INFO: 23epoch:train:3401-3500batch: iter_time=8.467e-05, forward_time=0.372, loss_ctc=49.996, loss_att=53.699, acc=0.745, loss=52.588, backward_time=0.415, grad_norm=34.859, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.121e-04, train_time=1.564 +[gpub058:0/16] 2024-02-03 06:39:02,498 (trainer:737) INFO: 23epoch:train:3501-3600batch: iter_time=9.442e-05, forward_time=0.342, loss_ctc=47.484, loss_att=47.010, acc=0.737, loss=47.152, backward_time=0.410, grad_norm=35.697, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.121e-04, train_time=1.633 +[gpub058:0/16] 2024-02-03 06:41:25,546 (trainer:737) INFO: 23epoch:train:3601-3700batch: iter_time=4.998e-04, forward_time=0.344, loss_ctc=60.367, loss_att=63.216, acc=0.703, loss=62.361, backward_time=0.424, grad_norm=46.216, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.120e-04, train_time=1.430 +[gpub058:0/16] 2024-02-03 06:42:50,350 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-03 06:43:08,989 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 06:43:12,547 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 06:43:12,547 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-03 06:43:12,550 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 06:48:58,420 (trainer:737) INFO: 23epoch:train:3701-3800batch: iter_time=3.116, forward_time=0.287, loss_ctc=43.119, loss_att=37.469, acc=0.762, loss=39.164, backward_time=0.402, grad_norm=36.154, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.120e-04, train_time=4.528 +[gpub058:0/16] 2024-02-03 06:50:49,843 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 06:51:31,389 (trainer:737) INFO: 23epoch:train:3801-3900batch: iter_time=2.734e-04, forward_time=0.406, loss_ctc=45.700, loss_att=43.597, acc=0.748, loss=44.228, backward_time=0.441, grad_norm=36.633, clip=100.000, loss_scale=8.706e+33, optim_step_time=0.099, optim0_lr0=2.120e-04, train_time=1.529 +[gpub058:0/16] 2024-02-03 06:54:10,145 (trainer:737) INFO: 23epoch:train:3901-4000batch: iter_time=8.373e-05, forward_time=0.290, loss_ctc=53.533, loss_att=53.411, acc=0.747, loss=53.448, backward_time=0.404, grad_norm=41.603, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.120e-04, train_time=1.588 +[gpub058:0/16] 2024-02-03 06:56:54,487 (trainer:737) INFO: 23epoch:train:4001-4100batch: iter_time=9.821e-05, forward_time=0.417, loss_ctc=45.640, loss_att=46.120, acc=0.752, loss=45.976, backward_time=0.427, grad_norm=33.993, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.119e-04, train_time=1.643 +[gpub058:0/16] 2024-02-03 06:59:10,439 (trainer:737) INFO: 23epoch:train:4101-4200batch: iter_time=9.241e-05, forward_time=0.289, loss_ctc=46.960, loss_att=51.335, acc=0.728, loss=50.022, backward_time=0.404, grad_norm=37.058, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.119e-04, train_time=1.359 +[gpub058:0/16] 2024-02-03 07:01:45,158 (trainer:737) INFO: 23epoch:train:4201-4300batch: iter_time=8.915e-05, forward_time=0.292, loss_ctc=58.358, loss_att=55.705, acc=0.752, loss=56.501, backward_time=0.406, grad_norm=47.202, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.119e-04, train_time=1.548 +[gpub058:0/16] 2024-02-03 07:04:29,008 (trainer:737) INFO: 23epoch:train:4301-4400batch: iter_time=8.590e-05, forward_time=0.379, loss_ctc=48.343, loss_att=47.139, acc=0.758, loss=47.500, backward_time=0.462, grad_norm=31.400, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.118e-04, train_time=1.638 +[gpub058:0/16] 2024-02-03 07:06:59,686 (trainer:737) INFO: 23epoch:train:4401-4500batch: iter_time=8.382e-05, forward_time=0.289, loss_ctc=50.454, loss_att=47.068, acc=0.751, loss=48.084, backward_time=0.402, grad_norm=37.204, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.118e-04, train_time=1.507 +[gpub058:0/16] 2024-02-03 07:09:35,198 (trainer:737) INFO: 23epoch:train:4501-4600batch: iter_time=9.267e-05, forward_time=0.290, loss_ctc=53.104, loss_att=44.300, acc=0.761, loss=46.941, backward_time=0.404, grad_norm=38.836, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.118e-04, train_time=1.554 +[gpub058:0/16] 2024-02-03 07:12:05,321 (trainer:737) INFO: 23epoch:train:4601-4700batch: iter_time=4.562e-04, forward_time=0.409, loss_ctc=52.534, loss_att=53.918, acc=0.741, loss=53.503, backward_time=0.420, grad_norm=37.643, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.117e-04, train_time=1.501 +[gpub058:0/16] 2024-02-03 07:14:50,431 (trainer:737) INFO: 23epoch:train:4701-4800batch: iter_time=9.082e-05, forward_time=0.323, loss_ctc=45.391, loss_att=45.851, acc=0.755, loss=45.713, backward_time=0.416, grad_norm=31.343, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.117e-04, train_time=1.651 +[gpub058:0/16] 2024-02-03 07:17:06,947 (trainer:737) INFO: 23epoch:train:4801-4900batch: iter_time=8.539e-05, forward_time=0.291, loss_ctc=57.959, loss_att=56.451, acc=0.718, loss=56.903, backward_time=0.406, grad_norm=42.835, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.117e-04, train_time=1.364 +[gpub058:0/16] 2024-02-03 07:19:50,651 (trainer:737) INFO: 23epoch:train:4901-5000batch: iter_time=8.657e-05, forward_time=0.356, loss_ctc=45.005, loss_att=50.952, acc=0.729, loss=49.168, backward_time=0.415, grad_norm=36.871, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.116e-04, train_time=1.637 +[gpub058:0/16] 2024-02-03 07:20:10,679 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-03 07:20:29,717 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 07:20:33,610 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 07:20:33,610 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-03 07:20:33,613 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 07:27:25,966 (trainer:737) INFO: 23epoch:train:5001-5100batch: iter_time=3.126, forward_time=0.332, loss_ctc=46.601, loss_att=37.950, acc=0.773, loss=40.545, backward_time=0.413, grad_norm=34.459, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.116e-04, train_time=4.553 +[gpub058:0/16] 2024-02-03 07:30:05,687 (trainer:737) INFO: 23epoch:train:5101-5200batch: iter_time=8.513e-05, forward_time=0.355, loss_ctc=43.960, loss_att=49.003, acc=0.740, loss=47.490, backward_time=0.405, grad_norm=33.399, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.116e-04, train_time=1.597 +[gpub058:0/16] 2024-02-03 07:32:39,410 (trainer:737) INFO: 23epoch:train:5201-5300batch: iter_time=8.114e-05, forward_time=0.308, loss_ctc=52.702, loss_att=45.907, acc=0.763, loss=47.946, backward_time=0.470, grad_norm=41.395, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.115e-04, train_time=1.537 +[gpub058:0/16] 2024-02-03 07:35:22,549 (trainer:737) INFO: 23epoch:train:5301-5400batch: iter_time=8.295e-05, forward_time=0.288, loss_ctc=43.336, loss_att=47.760, acc=0.737, loss=46.433, backward_time=0.402, grad_norm=34.297, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.115e-04, train_time=1.631 +[gpub058:0/16] 2024-02-03 07:37:55,614 (trainer:737) INFO: 23epoch:train:5401-5500batch: iter_time=9.229e-05, forward_time=0.360, loss_ctc=56.256, loss_att=58.962, acc=0.732, loss=58.150, backward_time=0.475, grad_norm=43.907, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.115e-04, train_time=1.530 +[gpub058:0/16] 2024-02-03 07:40:43,807 (trainer:737) INFO: 23epoch:train:5501-5600batch: iter_time=1.051e-04, forward_time=0.291, loss_ctc=51.651, loss_att=46.295, acc=0.770, loss=47.902, backward_time=0.405, grad_norm=33.469, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.114e-04, train_time=1.682 +[gpub058:0/16] 2024-02-03 07:43:07,727 (trainer:737) INFO: 23epoch:train:5601-5700batch: iter_time=8.195e-05, forward_time=0.364, loss_ctc=49.778, loss_att=51.610, acc=0.742, loss=51.061, backward_time=0.412, grad_norm=32.890, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.114e-04, train_time=1.439 +[gpub058:0/16] 2024-02-03 07:45:45,537 (trainer:737) INFO: 23epoch:train:5701-5800batch: iter_time=8.749e-05, forward_time=0.311, loss_ctc=47.851, loss_att=41.233, acc=0.766, loss=43.218, backward_time=0.446, grad_norm=34.604, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.114e-04, train_time=1.577 +[gpub058:0/16] 2024-02-03 07:48:20,539 (trainer:737) INFO: 23epoch:train:5801-5900batch: iter_time=9.100e-05, forward_time=0.291, loss_ctc=53.371, loss_att=46.389, acc=0.753, loss=48.484, backward_time=0.405, grad_norm=39.072, clip=100.000, loss_scale=6.854e+33, optim_step_time=0.092, optim0_lr0=2.114e-04, train_time=1.550 +[gpub058:0/16] 2024-02-03 07:50:58,815 (trainer:737) INFO: 23epoch:train:5901-6000batch: iter_time=8.510e-05, forward_time=0.339, loss_ctc=49.083, loss_att=53.002, acc=0.753, loss=51.826, backward_time=0.423, grad_norm=35.392, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.113e-04, train_time=1.583 +[gpub058:0/16] 2024-02-03 07:53:14,443 (trainer:737) INFO: 23epoch:train:6001-6100batch: iter_time=9.282e-05, forward_time=0.289, loss_ctc=47.612, loss_att=46.412, acc=0.743, loss=46.772, backward_time=0.403, grad_norm=35.477, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.113e-04, train_time=1.356 +[gpub058:0/16] 2024-02-03 07:56:15,835 (trainer:737) INFO: 23epoch:train:6101-6200batch: iter_time=8.948e-05, forward_time=0.293, loss_ctc=59.393, loss_att=64.002, acc=0.707, loss=62.619, backward_time=0.407, grad_norm=45.644, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.113e-04, train_time=1.814 +[gpub058:0/16] 2024-02-03 07:57:54,876 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-03 07:58:14,008 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 07:58:17,927 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 07:58:17,927 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-03 07:58:17,930 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 08:03:58,208 (trainer:737) INFO: 23epoch:train:6201-6300batch: iter_time=3.110, forward_time=0.381, loss_ctc=42.941, loss_att=37.469, acc=0.764, loss=39.110, backward_time=0.443, grad_norm=35.328, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.112e-04, train_time=4.623 +[gpub058:0/16] 2024-02-03 08:06:38,213 (trainer:737) INFO: 23epoch:train:6301-6400batch: iter_time=2.536e-04, forward_time=0.330, loss_ctc=44.702, loss_att=44.591, acc=0.734, loss=44.625, backward_time=0.405, grad_norm=38.617, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.112e-04, train_time=1.599 +[gpub058:0/16] 2024-02-03 08:08:23,368 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 08:09:19,501 (trainer:737) INFO: 23epoch:train:6401-6500batch: iter_time=0.001, forward_time=0.388, loss_ctc=52.479, loss_att=52.195, acc=0.747, loss=52.280, backward_time=0.424, grad_norm=41.081, clip=100.000, loss_scale=8.759e+33, optim_step_time=0.094, optim0_lr0=2.112e-04, train_time=1.613 +[gpub058:0/16] 2024-02-03 08:11:36,089 (trainer:737) INFO: 23epoch:train:6501-6600batch: iter_time=8.831e-05, forward_time=0.289, loss_ctc=44.734, loss_att=46.825, acc=0.743, loss=46.198, backward_time=0.403, grad_norm=34.953, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.111e-04, train_time=1.366 +[gpub058:0/16] 2024-02-03 08:14:24,549 (trainer:737) INFO: 23epoch:train:6601-6700batch: iter_time=3.664e-04, forward_time=0.375, loss_ctc=46.478, loss_att=48.809, acc=0.730, loss=48.110, backward_time=0.420, grad_norm=40.809, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.111e-04, train_time=1.684 +[gpub058:0/16] 2024-02-03 08:16:44,015 (trainer:737) INFO: 23epoch:train:6701-6800batch: iter_time=9.023e-05, forward_time=0.292, loss_ctc=56.880, loss_att=56.872, acc=0.738, loss=56.875, backward_time=0.406, grad_norm=48.068, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.111e-04, train_time=1.395 +[gpub058:0/16] 2024-02-03 08:19:09,689 (trainer:737) INFO: 23epoch:train:6801-6900batch: iter_time=9.810e-05, forward_time=0.290, loss_ctc=48.515, loss_att=46.516, acc=0.749, loss=47.115, backward_time=0.404, grad_norm=33.780, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.110e-04, train_time=1.457 +[gpub058:0/16] 2024-02-03 08:22:01,086 (trainer:737) INFO: 23epoch:train:6901-7000batch: iter_time=9.444e-05, forward_time=0.354, loss_ctc=49.944, loss_att=47.110, acc=0.747, loss=47.960, backward_time=0.480, grad_norm=37.399, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.110e-04, train_time=1.713 +[gpub058:0/16] 2024-02-03 08:24:15,174 (trainer:737) INFO: 23epoch:train:7001-7100batch: iter_time=9.158e-05, forward_time=0.291, loss_ctc=52.235, loss_att=43.790, acc=0.764, loss=46.324, backward_time=0.405, grad_norm=40.832, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.110e-04, train_time=1.341 +[gpub058:0/16] 2024-02-03 08:27:00,077 (trainer:737) INFO: 23epoch:train:7101-7200batch: iter_time=9.847e-05, forward_time=0.419, loss_ctc=52.127, loss_att=52.346, acc=0.736, loss=52.280, backward_time=0.423, grad_norm=37.436, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.109e-04, train_time=1.649 +[gpub058:0/16] 2024-02-03 08:29:33,325 (trainer:737) INFO: 23epoch:train:7201-7300batch: iter_time=9.375e-05, forward_time=0.289, loss_ctc=45.139, loss_att=45.854, acc=0.753, loss=45.639, backward_time=0.403, grad_norm=31.659, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.109e-04, train_time=1.532 +[gpub058:0/16] 2024-02-03 08:32:16,751 (trainer:737) INFO: 23epoch:train:7301-7400batch: iter_time=3.396e-04, forward_time=0.394, loss_ctc=57.735, loss_att=56.133, acc=0.716, loss=56.613, backward_time=0.445, grad_norm=45.415, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=2.109e-04, train_time=1.634 +[gpub058:0/16] 2024-02-03 08:34:35,099 (trainer:737) INFO: 23epoch:train:7401-7500batch: iter_time=8.331e-05, forward_time=0.289, loss_ctc=44.955, loss_att=50.288, acc=0.721, loss=48.688, backward_time=0.401, grad_norm=39.371, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.109e-04, train_time=1.383 +[gpub058:0/16] 2024-02-03 08:34:55,128 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-03 08:35:14,078 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 08:35:17,655 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 08:35:17,655 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-03 08:35:17,658 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 08:42:12,843 (trainer:737) INFO: 23epoch:train:7501-7600batch: iter_time=3.012, forward_time=0.424, loss_ctc=45.417, loss_att=38.268, acc=0.762, loss=40.413, backward_time=0.421, grad_norm=39.848, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.108e-04, train_time=4.577 +[gpub058:0/16] 2024-02-03 08:44:33,355 (trainer:737) INFO: 23epoch:train:7601-7700batch: iter_time=8.293e-05, forward_time=0.288, loss_ctc=44.116, loss_att=47.915, acc=0.734, loss=46.775, backward_time=0.401, grad_norm=33.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.108e-04, train_time=1.405 +[gpub058:0/16] 2024-02-03 08:46:33,922 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 08:47:15,556 (trainer:737) INFO: 23epoch:train:7701-7800batch: iter_time=3.212e-04, forward_time=0.319, loss_ctc=52.228, loss_att=48.645, acc=0.750, loss=49.720, backward_time=0.424, grad_norm=42.865, clip=100.000, loss_scale=4.851e+33, optim_step_time=0.100, optim0_lr0=2.108e-04, train_time=1.622 +[gpub058:0/16] 2024-02-03 08:49:37,690 (trainer:737) INFO: 23epoch:train:7801-7900batch: iter_time=8.386e-05, forward_time=0.288, loss_ctc=42.803, loss_att=47.083, acc=0.739, loss=45.799, backward_time=0.402, grad_norm=34.061, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.107e-04, train_time=1.421 +[gpub058:0/16] 2024-02-03 08:52:01,216 (trainer:737) INFO: 23epoch:train:7901-8000batch: iter_time=9.791e-05, forward_time=0.290, loss_ctc=55.992, loss_att=57.118, acc=0.727, loss=56.780, backward_time=0.404, grad_norm=43.116, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.107e-04, train_time=1.435 +[gpub058:0/16] 2024-02-03 08:54:35,556 (trainer:737) INFO: 23epoch:train:8001-8100batch: iter_time=8.756e-05, forward_time=0.291, loss_ctc=51.431, loss_att=47.426, acc=0.753, loss=48.628, backward_time=0.405, grad_norm=35.343, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.107e-04, train_time=1.543 +[gpub058:0/16] 2024-02-03 08:57:29,256 (trainer:737) INFO: 23epoch:train:8101-8200batch: iter_time=8.970e-05, forward_time=0.397, loss_ctc=49.364, loss_att=50.439, acc=0.738, loss=50.116, backward_time=0.425, grad_norm=34.197, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.106e-04, train_time=1.736 +[gpub058:0/16] 2024-02-03 09:00:04,172 (trainer:737) INFO: 23epoch:train:8201-8300batch: iter_time=8.590e-05, forward_time=0.287, loss_ctc=47.800, loss_att=41.145, acc=0.764, loss=43.142, backward_time=0.399, grad_norm=35.069, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.106e-04, train_time=1.549 +[gpub058:0/16] 2024-02-03 09:02:39,167 (trainer:737) INFO: 23epoch:train:8301-8400batch: iter_time=9.282e-05, forward_time=0.298, loss_ctc=52.418, loss_att=44.165, acc=0.757, loss=46.641, backward_time=0.405, grad_norm=38.836, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.106e-04, train_time=1.550 +[gpub058:0/16] 2024-02-03 09:05:22,678 (trainer:737) INFO: 23epoch:train:8401-8500batch: iter_time=8.872e-05, forward_time=0.377, loss_ctc=48.722, loss_att=52.823, acc=0.749, loss=51.593, backward_time=0.435, grad_norm=34.460, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.105e-04, train_time=1.635 +[gpub058:0/16] 2024-02-03 09:07:39,636 (trainer:737) INFO: 23epoch:train:8501-8600batch: iter_time=8.577e-05, forward_time=0.288, loss_ctc=47.146, loss_att=46.618, acc=0.739, loss=46.777, backward_time=0.401, grad_norm=35.309, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.105e-04, train_time=1.369 +[gpub058:0/16] 2024-02-03 09:10:25,362 (trainer:737) INFO: 23epoch:train:8601-8700batch: iter_time=8.359e-05, forward_time=0.292, loss_ctc=58.526, loss_att=62.375, acc=0.704, loss=61.220, backward_time=0.406, grad_norm=44.867, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.105e-04, train_time=1.657 +[gpub058:0/16] 2024-02-03 09:12:07,716 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-03 09:12:26,863 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 09:12:30,501 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 09:12:30,502 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-03 09:12:30,505 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 09:18:18,224 (trainer:737) INFO: 23epoch:train:8701-8800batch: iter_time=3.180, forward_time=0.368, loss_ctc=42.549, loss_att=37.751, acc=0.765, loss=39.191, backward_time=0.466, grad_norm=35.566, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=2.104e-04, train_time=4.728 +[gpub058:0/16] 2024-02-03 09:20:43,182 (trainer:737) INFO: 23epoch:train:8801-8900batch: iter_time=7.316e-05, forward_time=0.288, loss_ctc=44.568, loss_att=43.168, acc=0.754, loss=43.588, backward_time=0.401, grad_norm=36.856, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.104e-04, train_time=1.449 +[gpub058:0/16] 2024-02-03 09:23:36,758 (trainer:737) INFO: 23epoch:train:8901-9000batch: iter_time=8.170e-05, forward_time=0.390, loss_ctc=52.024, loss_att=53.554, acc=0.747, loss=53.095, backward_time=0.453, grad_norm=43.161, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.104e-04, train_time=1.736 +[gpub058:0/16] 2024-02-03 09:25:56,400 (trainer:737) INFO: 23epoch:train:9001-9100batch: iter_time=8.444e-05, forward_time=0.290, loss_ctc=44.873, loss_att=45.483, acc=0.756, loss=45.300, backward_time=0.403, grad_norm=35.606, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.104e-04, train_time=1.396 +[gpub058:0/16] 2024-02-03 09:28:28,452 (trainer:737) INFO: 23epoch:train:9101-9200batch: iter_time=8.976e-05, forward_time=0.346, loss_ctc=46.172, loss_att=51.522, acc=0.728, loss=49.917, backward_time=0.420, grad_norm=37.349, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.103e-04, train_time=1.520 +[gpub058:0/16] 2024-02-03 09:31:02,885 (trainer:737) INFO: 23epoch:train:9201-9300batch: iter_time=8.624e-05, forward_time=0.353, loss_ctc=57.149, loss_att=54.577, acc=0.757, loss=55.348, backward_time=0.419, grad_norm=48.652, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.103e-04, train_time=1.544 +[gpub058:0/16] 2024-02-03 09:33:35,067 (trainer:737) INFO: 23epoch:train:9301-9400batch: iter_time=8.637e-05, forward_time=0.290, loss_ctc=47.913, loss_att=47.300, acc=0.759, loss=47.484, backward_time=0.401, grad_norm=33.729, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.103e-04, train_time=1.521 +[gpub058:0/16] 2024-02-03 09:36:21,224 (trainer:737) INFO: 23epoch:train:9401-9500batch: iter_time=8.027e-05, forward_time=0.355, loss_ctc=49.887, loss_att=46.598, acc=0.755, loss=47.585, backward_time=0.420, grad_norm=35.244, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.102e-04, train_time=1.661 +[gpub058:0/16] 2024-02-03 09:38:43,584 (trainer:737) INFO: 23epoch:train:9501-9600batch: iter_time=3.822e-04, forward_time=0.334, loss_ctc=51.946, loss_att=44.156, acc=0.762, loss=46.493, backward_time=0.422, grad_norm=38.506, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.102e-04, train_time=1.423 +[gpub058:0/16] 2024-02-03 09:41:38,010 (trainer:737) INFO: 23epoch:train:9601-9700batch: iter_time=7.941e-05, forward_time=0.292, loss_ctc=51.311, loss_att=54.192, acc=0.743, loss=53.328, backward_time=0.405, grad_norm=38.033, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.102e-04, train_time=1.744 +[gpub058:0/16] 2024-02-03 09:44:11,050 (trainer:737) INFO: 23epoch:train:9701-9800batch: iter_time=4.824e-04, forward_time=0.410, loss_ctc=44.541, loss_att=45.440, acc=0.759, loss=45.170, backward_time=0.451, grad_norm=30.853, clip=100.000, loss_scale=2.934e+33, optim_step_time=0.096, optim0_lr0=2.101e-04, train_time=1.531 +[gpub058:0/16] 2024-02-03 09:46:36,083 (trainer:737) INFO: 23epoch:train:9801-9900batch: iter_time=7.666e-05, forward_time=0.290, loss_ctc=56.622, loss_att=56.446, acc=0.719, loss=56.499, backward_time=0.404, grad_norm=44.563, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.101e-04, train_time=1.450 +[gpub058:0/16] 2024-02-03 09:49:22,348 (trainer:737) INFO: 23epoch:train:9901-10000batch: iter_time=7.812e-05, forward_time=0.289, loss_ctc=44.462, loss_att=50.984, acc=0.730, loss=49.028, backward_time=0.401, grad_norm=39.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.101e-04, train_time=1.663 +[gpub058:0/16] 2024-02-03 09:49:42,377 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub058:0/16] 2024-02-03 09:50:01,362 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 09:50:04,969 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 09:50:04,969 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-03 09:50:04,972 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 09:57:17,786 (trainer:737) INFO: 23epoch:train:10001-10100batch: iter_time=3.153, forward_time=0.408, loss_ctc=45.568, loss_att=38.862, acc=0.763, loss=40.874, backward_time=0.426, grad_norm=36.967, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.100e-04, train_time=4.754 +[gpub058:0/16] 2024-02-03 09:59:54,603 (trainer:737) INFO: 23epoch:train:10101-10200batch: iter_time=8.011e-05, forward_time=0.287, loss_ctc=44.147, loss_att=48.039, acc=0.736, loss=46.871, backward_time=0.399, grad_norm=34.265, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.100e-04, train_time=1.567 +[gpub058:0/16] 2024-02-03 10:02:25,490 (trainer:737) INFO: 23epoch:train:10201-10300batch: iter_time=8.078e-05, forward_time=0.295, loss_ctc=51.456, loss_att=46.709, acc=0.753, loss=48.133, backward_time=0.403, grad_norm=42.803, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.100e-04, train_time=1.510 +[gpub058:0/16] 2024-02-03 10:05:02,051 (trainer:737) INFO: 23epoch:train:10301-10400batch: iter_time=8.244e-05, forward_time=0.358, loss_ctc=42.271, loss_att=48.269, acc=0.735, loss=46.469, backward_time=0.447, grad_norm=36.169, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.100e-04, train_time=1.565 +[gpub058:0/16] 2024-02-03 10:07:36,249 (trainer:737) INFO: 23epoch:train:10401-10500batch: iter_time=8.111e-05, forward_time=0.290, loss_ctc=55.145, loss_att=57.538, acc=0.727, loss=56.820, backward_time=0.403, grad_norm=62.494, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.099e-04, train_time=1.541 +[gpub058:0/16] 2024-02-03 10:10:18,653 (trainer:737) INFO: 23epoch:train:10501-10600batch: iter_time=8.538e-05, forward_time=0.301, loss_ctc=51.409, loss_att=47.329, acc=0.752, loss=48.553, backward_time=0.415, grad_norm=34.064, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.099e-04, train_time=1.623 +[gpub058:0/16] 2024-02-03 10:12:47,095 (trainer:737) INFO: 23epoch:train:10601-10700batch: iter_time=7.756e-04, forward_time=0.392, loss_ctc=49.673, loss_att=50.097, acc=0.741, loss=49.970, backward_time=0.418, grad_norm=36.125, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.099e-04, train_time=1.486 +[gpub058:0/16] 2024-02-03 10:15:32,174 (trainer:737) INFO: 23epoch:train:10701-10800batch: iter_time=8.213e-05, forward_time=0.288, loss_ctc=47.416, loss_att=41.274, acc=0.764, loss=43.117, backward_time=0.399, grad_norm=34.363, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.098e-04, train_time=1.650 +[gpub058:0/16] 2024-02-03 10:17:51,955 (trainer:737) INFO: 23epoch:train:10801-10900batch: iter_time=8.310e-05, forward_time=0.291, loss_ctc=52.572, loss_att=44.135, acc=0.758, loss=46.666, backward_time=0.409, grad_norm=40.305, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.098e-04, train_time=1.398 +[gpub058:0/16] 2024-02-03 10:20:38,653 (trainer:737) INFO: 23epoch:train:10901-11000batch: iter_time=4.566e-04, forward_time=0.366, loss_ctc=48.690, loss_att=53.107, acc=0.749, loss=51.782, backward_time=0.458, grad_norm=34.499, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.098e-04, train_time=1.667 +[gpub058:0/16] 2024-02-03 10:23:12,935 (trainer:737) INFO: 23epoch:train:11001-11100batch: iter_time=8.714e-05, forward_time=0.288, loss_ctc=46.975, loss_att=46.264, acc=0.741, loss=46.478, backward_time=0.403, grad_norm=36.505, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.097e-04, train_time=1.543 +[gpub058:0/16] 2024-02-03 10:25:54,383 (trainer:737) INFO: 23epoch:train:11101-11200batch: iter_time=8.212e-05, forward_time=0.310, loss_ctc=58.572, loss_att=62.046, acc=0.708, loss=61.004, backward_time=0.405, grad_norm=47.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.097e-04, train_time=1.614 +[gpub058:0/16] 2024-02-03 10:27:23,107 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-03 10:27:42,187 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 10:27:45,735 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 10:27:45,735 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-03 10:27:45,738 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 10:33:23,084 (trainer:737) INFO: 23epoch:train:11201-11300batch: iter_time=3.056, forward_time=0.374, loss_ctc=42.346, loss_att=36.715, acc=0.763, loss=38.404, backward_time=0.425, grad_norm=36.788, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.097e-04, train_time=4.487 +[gpub058:0/16] 2024-02-03 10:35:46,878 (trainer:737) INFO: 23epoch:train:11301-11400batch: iter_time=8.346e-05, forward_time=0.290, loss_ctc=44.158, loss_att=41.855, acc=0.742, loss=42.546, backward_time=0.399, grad_norm=36.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.096e-04, train_time=1.438 +[gpub058:0/16] 2024-02-03 10:38:33,758 (trainer:737) INFO: 23epoch:train:11401-11500batch: iter_time=8.306e-05, forward_time=0.309, loss_ctc=52.388, loss_att=50.375, acc=0.748, loss=50.979, backward_time=0.428, grad_norm=41.941, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.096e-04, train_time=1.668 +[gpub058:0/16] 2024-02-03 10:40:51,068 (trainer:737) INFO: 23epoch:train:11501-11600batch: iter_time=6.465e-04, forward_time=0.334, loss_ctc=44.619, loss_att=45.560, acc=0.748, loss=45.277, backward_time=0.432, grad_norm=33.507, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.096e-04, train_time=1.373 +[gpub058:0/16] 2024-02-03 10:43:33,348 (trainer:737) INFO: 23epoch:train:11601-11700batch: iter_time=8.422e-05, forward_time=0.288, loss_ctc=45.173, loss_att=48.822, acc=0.729, loss=47.727, backward_time=0.401, grad_norm=37.036, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.096e-04, train_time=1.622 +[gpub058:0/16] 2024-02-03 10:46:04,201 (trainer:737) INFO: 23epoch:train:11701-11800batch: iter_time=8.409e-05, forward_time=0.328, loss_ctc=56.321, loss_att=54.655, acc=0.742, loss=55.155, backward_time=0.422, grad_norm=42.758, clip=100.000, loss_scale=5.867e+33, optim_step_time=0.095, optim0_lr0=2.095e-04, train_time=1.508 +[gpub058:0/16] 2024-02-03 10:48:38,884 (trainer:737) INFO: 23epoch:train:11801-11900batch: iter_time=2.896e-04, forward_time=0.350, loss_ctc=47.887, loss_att=45.556, acc=0.752, loss=46.256, backward_time=0.417, grad_norm=32.197, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.095e-04, train_time=1.547 +[gpub058:0/16] 2024-02-03 10:51:07,103 (trainer:737) INFO: 23epoch:train:11901-12000batch: iter_time=8.832e-05, forward_time=0.289, loss_ctc=49.604, loss_att=46.414, acc=0.752, loss=47.371, backward_time=0.402, grad_norm=35.506, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.095e-04, train_time=1.482 +[gpub058:0/16] 2024-02-03 10:53:55,876 (trainer:737) INFO: 23epoch:train:12001-12100batch: iter_time=8.670e-05, forward_time=0.337, loss_ctc=51.387, loss_att=43.974, acc=0.766, loss=46.198, backward_time=0.414, grad_norm=36.073, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.094e-04, train_time=1.688 +[gpub058:0/16] 2024-02-03 10:56:48,302 (trainer:737) INFO: 23epoch:train:12101-12200batch: iter_time=6.227e-04, forward_time=0.362, loss_ctc=51.948, loss_att=52.246, acc=0.739, loss=52.156, backward_time=0.423, grad_norm=39.474, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.094e-04, train_time=1.723 +[gpub058:0/16] 2024-02-03 10:59:05,607 (trainer:737) INFO: 23epoch:train:12201-12300batch: iter_time=8.387e-05, forward_time=0.290, loss_ctc=44.691, loss_att=45.443, acc=0.756, loss=45.218, backward_time=0.403, grad_norm=33.582, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.094e-04, train_time=1.374 +[gpub058:0/16] 2024-02-03 11:01:52,314 (trainer:737) INFO: 23epoch:train:12301-12400batch: iter_time=9.694e-05, forward_time=0.341, loss_ctc=56.691, loss_att=55.138, acc=0.718, loss=55.604, backward_time=0.422, grad_norm=44.931, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.093e-04, train_time=1.667 +[gpub058:0/16] 2024-02-03 11:04:27,903 (trainer:737) INFO: 23epoch:train:12401-12500batch: iter_time=2.313e-04, forward_time=0.355, loss_ctc=44.426, loss_att=49.678, acc=0.725, loss=48.102, backward_time=0.433, grad_norm=37.754, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.093e-04, train_time=1.555 +[gpub058:0/16] 2024-02-03 11:04:47,931 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-03 11:05:06,758 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 11:05:10,333 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 11:05:10,333 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-03 11:05:10,336 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 11:12:34,049 (trainer:737) INFO: 23epoch:train:12501-12600batch: iter_time=3.259, forward_time=0.318, loss_ctc=45.326, loss_att=38.783, acc=0.773, loss=40.746, backward_time=0.402, grad_norm=35.838, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.093e-04, train_time=4.861 +[gpub058:0/16] 2024-02-03 11:15:15,888 (trainer:737) INFO: 23epoch:train:12601-12700batch: iter_time=7.736e-05, forward_time=0.357, loss_ctc=43.673, loss_att=50.354, acc=0.740, loss=48.349, backward_time=0.416, grad_norm=33.882, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.092e-04, train_time=1.618 +[gpub058:0/16] 2024-02-03 11:17:36,737 (trainer:737) INFO: 23epoch:train:12701-12800batch: iter_time=7.794e-05, forward_time=0.290, loss_ctc=51.037, loss_att=46.430, acc=0.764, loss=47.812, backward_time=0.405, grad_norm=41.213, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.092e-04, train_time=1.406 +[gpub058:0/16] 2024-02-03 11:20:22,577 (trainer:737) INFO: 23epoch:train:12801-12900batch: iter_time=8.277e-05, forward_time=0.290, loss_ctc=42.590, loss_att=47.885, acc=0.738, loss=46.297, backward_time=0.418, grad_norm=36.544, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.092e-04, train_time=1.660 +[gpub058:0/16] 2024-02-03 11:23:11,746 (trainer:737) INFO: 23epoch:train:12901-13000batch: iter_time=8.914e-05, forward_time=0.363, loss_ctc=55.164, loss_att=58.613, acc=0.737, loss=57.578, backward_time=0.426, grad_norm=46.901, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.092e-04, train_time=1.691 +[gpub058:0/16] 2024-02-03 11:25:33,276 (trainer:737) INFO: 23epoch:train:13001-13100batch: iter_time=7.977e-05, forward_time=0.292, loss_ctc=51.294, loss_att=47.229, acc=0.768, loss=48.448, backward_time=0.406, grad_norm=34.267, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.091e-04, train_time=1.415 +[gpub058:0/16] 2024-02-03 11:28:18,396 (trainer:737) INFO: 23epoch:train:13101-13200batch: iter_time=8.154e-05, forward_time=0.307, loss_ctc=49.462, loss_att=51.969, acc=0.745, loss=51.217, backward_time=0.414, grad_norm=34.307, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.091e-04, train_time=1.650 +[gpub058:0/16] 2024-02-03 11:30:39,509 (trainer:737) INFO: 23epoch:train:13201-13300batch: iter_time=7.805e-05, forward_time=0.373, loss_ctc=47.383, loss_att=41.217, acc=0.767, loss=43.066, backward_time=0.423, grad_norm=35.651, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.091e-04, train_time=1.412 +[gpub058:0/16] 2024-02-03 11:33:19,472 (trainer:737) INFO: 23epoch:train:13301-13400batch: iter_time=8.542e-05, forward_time=0.290, loss_ctc=51.638, loss_att=46.363, acc=0.754, loss=47.946, backward_time=0.403, grad_norm=40.452, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.090e-04, train_time=1.599 +[gpub058:0/16] 2024-02-03 11:35:59,003 (trainer:737) INFO: 23epoch:train:13401-13500batch: iter_time=8.183e-05, forward_time=0.299, loss_ctc=48.436, loss_att=52.703, acc=0.755, loss=51.423, backward_time=0.430, grad_norm=33.872, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.090e-04, train_time=1.594 +[gpub058:0/16] 2024-02-03 11:38:50,058 (trainer:737) INFO: 23epoch:train:13501-13600batch: iter_time=9.657e-05, forward_time=0.363, loss_ctc=46.525, loss_att=46.259, acc=0.744, loss=46.339, backward_time=0.433, grad_norm=34.992, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.090e-04, train_time=1.711 +[gpub058:0/16] 2024-02-03 11:41:22,234 (trainer:737) INFO: 23epoch:train:13601-13700batch: iter_time=8.477e-05, forward_time=0.293, loss_ctc=58.241, loss_att=64.049, acc=0.708, loss=62.307, backward_time=0.407, grad_norm=47.642, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.089e-04, train_time=1.521 +[gpub058:0/16] 2024-02-03 11:43:21,883 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-03 11:43:41,327 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 11:43:44,922 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 11:43:44,922 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-03 11:43:44,925 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 11:50:09,900 (trainer:737) INFO: 23epoch:train:13701-13800batch: iter_time=3.315, forward_time=0.420, loss_ctc=42.088, loss_att=36.631, acc=0.765, loss=38.268, backward_time=0.425, grad_norm=36.108, clip=100.000, loss_scale=1.173e+34, optim_step_time=0.097, optim0_lr0=2.089e-04, train_time=5.277 +[gpub058:0/16] 2024-02-03 11:51:01,477 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 11:52:27,473 (trainer:737) INFO: 23epoch:train:13801-13900batch: iter_time=8.364e-05, forward_time=0.287, loss_ctc=43.830, loss_att=43.220, acc=0.738, loss=43.403, backward_time=0.399, grad_norm=37.771, clip=100.000, loss_scale=1.416e+34, optim_step_time=0.092, optim0_lr0=2.089e-04, train_time=1.376 +[gpub058:0/16] 2024-02-03 11:55:14,769 (trainer:737) INFO: 23epoch:train:13901-14000batch: iter_time=8.066e-05, forward_time=0.395, loss_ctc=53.037, loss_att=51.184, acc=0.749, loss=51.740, backward_time=0.438, grad_norm=41.141, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.088e-04, train_time=1.673 +[gpub058:0/16] 2024-02-03 11:57:16,265 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 11:57:52,156 (trainer:737) INFO: 23epoch:train:14001-14100batch: iter_time=8.057e-05, forward_time=0.310, loss_ctc=44.564, loss_att=45.975, acc=0.747, loss=45.552, backward_time=0.401, grad_norm=35.049, clip=100.000, loss_scale=9.021e+33, optim_step_time=0.092, optim0_lr0=2.088e-04, train_time=1.574 +[gpub058:0/16] 2024-02-03 12:00:24,886 (trainer:737) INFO: 23epoch:train:14101-14200batch: iter_time=8.338e-05, forward_time=0.289, loss_ctc=45.513, loss_att=48.550, acc=0.733, loss=47.639, backward_time=0.401, grad_norm=38.050, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.088e-04, train_time=1.527 +[gpub058:0/16] 2024-02-03 12:02:59,241 (trainer:737) INFO: 23epoch:train:14201-14300batch: iter_time=9.038e-05, forward_time=0.398, loss_ctc=55.457, loss_att=54.539, acc=0.741, loss=54.814, backward_time=0.434, grad_norm=45.526, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.088e-04, train_time=1.543 +[gpub058:0/16] 2024-02-03 12:05:28,359 (trainer:737) INFO: 23epoch:train:14301-14400batch: iter_time=8.927e-05, forward_time=0.289, loss_ctc=47.540, loss_att=45.726, acc=0.752, loss=46.270, backward_time=0.402, grad_norm=33.313, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.087e-04, train_time=1.491 +[gpub058:0/16] 2024-02-03 12:07:56,782 (trainer:737) INFO: 23epoch:train:14401-14500batch: iter_time=8.405e-05, forward_time=0.290, loss_ctc=50.001, loss_att=46.576, acc=0.750, loss=47.604, backward_time=0.404, grad_norm=36.670, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.087e-04, train_time=1.484 +[gpub058:0/16] 2024-02-03 12:10:35,246 (trainer:737) INFO: 23epoch:train:14501-14600batch: iter_time=8.240e-05, forward_time=0.381, loss_ctc=51.641, loss_att=43.497, acc=0.767, loss=45.941, backward_time=0.450, grad_norm=38.766, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=2.087e-04, train_time=1.584 +[gpub058:0/16] 2024-02-03 12:13:03,873 (trainer:737) INFO: 23epoch:train:14601-14700batch: iter_time=8.236e-05, forward_time=0.291, loss_ctc=51.524, loss_att=51.898, acc=0.739, loss=51.786, backward_time=0.405, grad_norm=38.737, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.086e-04, train_time=1.486 +[gpub058:0/16] 2024-02-03 12:15:42,677 (trainer:737) INFO: 23epoch:train:14701-14800batch: iter_time=2.236e-04, forward_time=0.290, loss_ctc=44.382, loss_att=45.147, acc=0.755, loss=44.917, backward_time=0.404, grad_norm=32.411, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.086e-04, train_time=1.588 +[gpub058:0/16] 2024-02-03 12:18:13,143 (trainer:737) INFO: 23epoch:train:14801-14900batch: iter_time=8.564e-05, forward_time=0.376, loss_ctc=56.018, loss_att=54.883, acc=0.721, loss=55.223, backward_time=0.452, grad_norm=44.005, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.086e-04, train_time=1.504 +[gpub058:0/16] 2024-02-03 12:20:38,165 (trainer:737) INFO: 23epoch:train:14901-15000batch: iter_time=8.003e-05, forward_time=0.289, loss_ctc=43.824, loss_att=49.462, acc=0.726, loss=47.771, backward_time=0.402, grad_norm=37.833, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.085e-04, train_time=1.450 +[gpub058:0/16] 2024-02-03 12:58:54,819 (trainer:343) INFO: 23epoch results: [train] iter_time=0.246, forward_time=0.329, loss_ctc=49.724, loss_att=48.692, acc=0.745, loss=49.002, backward_time=0.419, grad_norm=38.258, clip=100.000, loss_scale=6.231e+33, optim_step_time=0.095, optim0_lr0=2.108e-04, train_time=1.788, time=7 hours, 27 minutes and 32.77 seconds, total_count=375000, gpu_max_cached_mem_GB=43.281, [valid] loss_ctc=42.465, cer_ctc=0.207, loss_att=43.033, acc=0.664, cer=0.341, wer=0.993, loss=42.862, time=37 minutes and 52.7 seconds, total_count=116775, gpu_max_cached_mem_GB=43.281 +[gpub058:0/16] 2024-02-03 12:59:04,853 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub058:0/16] 2024-02-03 12:59:04,876 (trainer:272) INFO: 24/45epoch started. Estimated time to finish: 1 week, 12 hours and 46 minutes +[gpub058:0/16] 2024-02-03 12:59:04,889 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-03 12:59:23,667 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 12:59:27,402 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 12:59:27,402 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-03 12:59:27,405 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 13:06:15,352 (trainer:737) INFO: 24epoch:train:1-100batch: iter_time=2.845, forward_time=0.352, loss_ctc=50.175, loss_att=50.524, acc=0.738, loss=50.419, backward_time=0.413, grad_norm=37.072, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.085e-04, train_time=4.304 +[gpub058:0/16] 2024-02-03 13:08:50,403 (trainer:737) INFO: 24epoch:train:101-200batch: iter_time=2.293e-04, forward_time=0.355, loss_ctc=47.259, loss_att=43.000, acc=0.753, loss=44.278, backward_time=0.444, grad_norm=35.270, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.085e-04, train_time=1.550 +[gpub058:0/16] 2024-02-03 13:11:33,501 (trainer:737) INFO: 24epoch:train:201-300batch: iter_time=8.119e-05, forward_time=0.352, loss_ctc=51.331, loss_att=50.891, acc=0.735, loss=51.023, backward_time=0.420, grad_norm=39.278, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.085e-04, train_time=1.631 +[gpub058:0/16] 2024-02-03 13:14:21,069 (trainer:737) INFO: 24epoch:train:301-400batch: iter_time=8.229e-05, forward_time=0.343, loss_ctc=46.483, loss_att=46.575, acc=0.729, loss=46.547, backward_time=0.438, grad_norm=35.024, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.084e-04, train_time=1.675 +[gpub058:0/16] 2024-02-03 13:17:16,240 (trainer:737) INFO: 24epoch:train:401-500batch: iter_time=4.872e-04, forward_time=0.346, loss_ctc=49.660, loss_att=55.072, acc=0.733, loss=53.449, backward_time=0.437, grad_norm=37.110, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.084e-04, train_time=1.751 +[gpub058:0/16] 2024-02-03 13:19:53,818 (trainer:737) INFO: 24epoch:train:501-600batch: iter_time=8.355e-05, forward_time=0.372, loss_ctc=46.149, loss_att=44.927, acc=0.740, loss=45.293, backward_time=0.471, grad_norm=36.599, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.084e-04, train_time=1.576 +[gpub058:0/16] 2024-02-03 13:22:44,071 (trainer:737) INFO: 24epoch:train:601-700batch: iter_time=8.395e-05, forward_time=0.340, loss_ctc=45.721, loss_att=49.090, acc=0.734, loss=48.079, backward_time=0.426, grad_norm=35.211, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.083e-04, train_time=1.701 +[gpub058:0/16] 2024-02-03 13:25:09,614 (trainer:737) INFO: 24epoch:train:701-800batch: iter_time=2.257e-04, forward_time=0.320, loss_ctc=48.389, loss_att=47.318, acc=0.738, loss=47.639, backward_time=0.413, grad_norm=98.350, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.083e-04, train_time=1.456 +[gpub058:0/16] 2024-02-03 13:28:05,634 (trainer:737) INFO: 24epoch:train:801-900batch: iter_time=4.741e-04, forward_time=0.401, loss_ctc=51.690, loss_att=54.504, acc=0.713, loss=53.660, backward_time=0.437, grad_norm=39.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.083e-04, train_time=1.759 +[gpub058:0/16] 2024-02-03 13:31:04,193 (trainer:737) INFO: 24epoch:train:901-1000batch: iter_time=4.756e-04, forward_time=0.387, loss_ctc=57.463, loss_att=52.060, acc=0.732, loss=53.681, backward_time=0.439, grad_norm=51.901, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.082e-04, train_time=1.787 +[gpub058:0/16] 2024-02-03 13:33:42,963 (trainer:737) INFO: 24epoch:train:1001-1100batch: iter_time=8.326e-05, forward_time=0.366, loss_ctc=50.373, loss_att=52.115, acc=0.744, loss=51.593, backward_time=0.456, grad_norm=36.286, clip=100.000, loss_scale=6.542e+33, optim_step_time=0.100, optim0_lr0=2.082e-04, train_time=1.587 +[gpub058:0/16] 2024-02-03 13:36:32,272 (trainer:737) INFO: 24epoch:train:1101-1200batch: iter_time=4.115e-04, forward_time=0.327, loss_ctc=52.085, loss_att=42.168, acc=0.764, loss=45.143, backward_time=0.413, grad_norm=40.559, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.082e-04, train_time=1.694 +[gpub058:0/16] 2024-02-03 13:38:16,364 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-03 13:38:35,295 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 13:38:38,945 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 13:38:38,945 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-03 13:38:38,948 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 13:44:44,299 (trainer:737) INFO: 24epoch:train:1201-1300batch: iter_time=3.260, forward_time=0.376, loss_ctc=57.261, loss_att=62.858, acc=0.714, loss=61.179, backward_time=0.437, grad_norm=42.682, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.082e-04, train_time=4.920 +[gpub058:0/16] 2024-02-03 13:47:24,705 (trainer:737) INFO: 24epoch:train:1301-1400batch: iter_time=8.574e-05, forward_time=0.333, loss_ctc=46.474, loss_att=42.675, acc=0.753, loss=43.815, backward_time=0.409, grad_norm=38.063, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.081e-04, train_time=1.604 +[gpub058:0/16] 2024-02-03 13:50:07,566 (trainer:737) INFO: 24epoch:train:1401-1500batch: iter_time=8.603e-05, forward_time=0.348, loss_ctc=50.702, loss_att=55.884, acc=0.742, loss=54.329, backward_time=0.438, grad_norm=39.573, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.081e-04, train_time=1.628 +[gpub058:0/16] 2024-02-03 13:52:36,198 (trainer:737) INFO: 24epoch:train:1501-1600batch: iter_time=9.179e-05, forward_time=0.290, loss_ctc=45.503, loss_att=42.781, acc=0.755, loss=43.597, backward_time=0.403, grad_norm=36.199, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.081e-04, train_time=1.486 +[gpub058:0/16] 2024-02-03 13:54:47,270 (trainer:737) INFO: 24epoch:train:1601-1700batch: iter_time=9.484e-05, forward_time=0.291, loss_ctc=48.482, loss_att=53.460, acc=0.729, loss=51.967, backward_time=0.412, grad_norm=34.934, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.080e-04, train_time=1.311 +[gpub058:0/16] 2024-02-03 13:57:30,504 (trainer:737) INFO: 24epoch:train:1701-1800batch: iter_time=9.309e-05, forward_time=0.365, loss_ctc=45.546, loss_att=48.838, acc=0.759, loss=47.850, backward_time=0.458, grad_norm=35.985, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.080e-04, train_time=1.632 +[gpub058:0/16] 2024-02-03 14:00:09,245 (trainer:737) INFO: 24epoch:train:1801-1900batch: iter_time=9.402e-05, forward_time=0.288, loss_ctc=43.166, loss_att=38.133, acc=0.754, loss=39.643, backward_time=0.400, grad_norm=33.451, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.080e-04, train_time=1.587 +[gpub058:0/16] 2024-02-03 14:02:23,503 (trainer:737) INFO: 24epoch:train:1901-2000batch: iter_time=9.437e-05, forward_time=0.302, loss_ctc=50.123, loss_att=59.077, acc=0.723, loss=56.391, backward_time=0.408, grad_norm=36.323, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.079e-04, train_time=1.343 +[gpub058:0/16] 2024-02-03 14:05:17,867 (trainer:737) INFO: 24epoch:train:2001-2100batch: iter_time=9.292e-05, forward_time=0.329, loss_ctc=45.664, loss_att=48.663, acc=0.737, loss=47.763, backward_time=0.437, grad_norm=38.871, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.079e-04, train_time=1.744 +[gpub058:0/16] 2024-02-03 14:05:58,174 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 14:07:34,427 (trainer:737) INFO: 24epoch:train:2101-2200batch: iter_time=9.150e-05, forward_time=0.330, loss_ctc=51.021, loss_att=51.510, acc=0.738, loss=51.363, backward_time=0.408, grad_norm=37.640, clip=100.000, loss_scale=6.661e+33, optim_step_time=0.092, optim0_lr0=2.079e-04, train_time=1.365 +[gpub058:0/16] 2024-02-03 14:09:54,996 (trainer:737) INFO: 24epoch:train:2201-2300batch: iter_time=8.379e-05, forward_time=0.295, loss_ctc=59.173, loss_att=57.892, acc=0.753, loss=58.277, backward_time=0.408, grad_norm=52.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.079e-04, train_time=1.405 +[gpub058:0/16] 2024-02-03 14:12:47,009 (trainer:737) INFO: 24epoch:train:2301-2400batch: iter_time=8.676e-05, forward_time=0.353, loss_ctc=52.187, loss_att=49.489, acc=0.763, loss=50.298, backward_time=0.422, grad_norm=38.302, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.078e-04, train_time=1.720 +[gpub058:0/16] 2024-02-03 14:15:07,047 (trainer:737) INFO: 24epoch:train:2401-2500batch: iter_time=8.537e-05, forward_time=0.302, loss_ctc=53.081, loss_att=53.724, acc=0.721, loss=53.531, backward_time=0.411, grad_norm=39.582, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.078e-04, train_time=1.400 +[gpub058:0/16] 2024-02-03 14:15:27,074 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-03 14:15:46,137 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 14:15:49,656 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 14:15:49,657 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-03 14:15:49,660 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 14:23:20,637 (trainer:737) INFO: 24epoch:train:2501-2600batch: iter_time=3.216, forward_time=0.303, loss_ctc=47.587, loss_att=49.463, acc=0.740, loss=48.900, backward_time=0.406, grad_norm=38.770, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.078e-04, train_time=4.935 +[gpub058:0/16] 2024-02-03 14:26:00,165 (trainer:737) INFO: 24epoch:train:2601-2700batch: iter_time=0.045, forward_time=0.372, loss_ctc=46.159, loss_att=42.381, acc=0.759, loss=43.514, backward_time=0.420, grad_norm=35.776, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.077e-04, train_time=1.596 +[gpub058:0/16] 2024-02-03 14:29:44,262 (trainer:737) INFO: 24epoch:train:2701-2800batch: iter_time=0.036, forward_time=0.312, loss_ctc=50.341, loss_att=52.306, acc=0.734, loss=51.717, backward_time=0.457, grad_norm=39.222, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.077e-04, train_time=2.241 +[gpub058:0/16] 2024-02-03 14:32:14,257 (trainer:737) INFO: 24epoch:train:2801-2900batch: iter_time=8.050e-05, forward_time=0.300, loss_ctc=45.951, loss_att=46.445, acc=0.732, loss=46.296, backward_time=0.405, grad_norm=36.631, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.077e-04, train_time=1.500 +[gpub058:0/16] 2024-02-03 14:34:32,025 (trainer:737) INFO: 24epoch:train:2901-3000batch: iter_time=8.631e-05, forward_time=0.296, loss_ctc=48.234, loss_att=53.824, acc=0.737, loss=52.147, backward_time=0.409, grad_norm=36.370, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.076e-04, train_time=1.363 +[gpub058:0/16] 2024-02-03 14:38:44,483 (trainer:737) INFO: 24epoch:train:3001-3100batch: iter_time=0.006, forward_time=0.383, loss_ctc=44.962, loss_att=43.934, acc=0.746, loss=44.243, backward_time=0.432, grad_norm=34.495, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.076e-04, train_time=2.538 +[gpub058:0/16] 2024-02-03 14:41:20,494 (trainer:737) INFO: 24epoch:train:3101-3200batch: iter_time=8.701e-05, forward_time=0.306, loss_ctc=44.784, loss_att=48.762, acc=0.736, loss=47.569, backward_time=0.406, grad_norm=34.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.076e-04, train_time=1.560 +[gpub058:0/16] 2024-02-03 14:43:34,182 (trainer:737) INFO: 24epoch:train:3201-3300batch: iter_time=8.098e-05, forward_time=0.291, loss_ctc=47.222, loss_att=46.554, acc=0.742, loss=46.755, backward_time=0.418, grad_norm=36.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.076e-04, train_time=1.337 +[gpub058:0/16] 2024-02-03 14:47:28,620 (trainer:737) INFO: 24epoch:train:3301-3400batch: iter_time=0.091, forward_time=0.388, loss_ctc=50.979, loss_att=54.705, acc=0.713, loss=53.587, backward_time=0.431, grad_norm=40.560, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.075e-04, train_time=2.343 +[gpub058:0/16] 2024-02-03 14:49:56,127 (trainer:737) INFO: 24epoch:train:3401-3500batch: iter_time=8.377e-05, forward_time=0.307, loss_ctc=54.501, loss_att=51.812, acc=0.737, loss=52.618, backward_time=0.408, grad_norm=49.651, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.075e-04, train_time=1.476 +[gpub058:0/16] 2024-02-03 14:52:35,300 (trainer:737) INFO: 24epoch:train:3501-3600batch: iter_time=8.381e-05, forward_time=0.291, loss_ctc=49.440, loss_att=51.478, acc=0.747, loss=50.867, backward_time=0.405, grad_norm=36.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.075e-04, train_time=1.592 +[gpub058:0/16] 2024-02-03 14:54:54,607 (trainer:737) INFO: 24epoch:train:3601-3700batch: iter_time=8.204e-05, forward_time=0.356, loss_ctc=50.651, loss_att=42.628, acc=0.762, loss=45.035, backward_time=0.425, grad_norm=40.204, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.074e-04, train_time=1.393 +[gpub058:0/16] 2024-02-03 14:56:25,836 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-03 14:56:44,585 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 14:56:48,223 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 14:56:48,223 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-03 14:56:48,226 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 15:02:47,187 (trainer:737) INFO: 24epoch:train:3701-3800batch: iter_time=3.255, forward_time=0.325, loss_ctc=55.596, loss_att=61.837, acc=0.717, loss=59.964, backward_time=0.423, grad_norm=39.471, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.074e-04, train_time=4.725 +[gpub058:0/16] 2024-02-03 15:05:29,084 (trainer:737) INFO: 24epoch:train:3801-3900batch: iter_time=7.637e-05, forward_time=0.294, loss_ctc=45.553, loss_att=42.102, acc=0.757, loss=43.137, backward_time=0.406, grad_norm=36.936, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.074e-04, train_time=1.619 +[gpub058:0/16] 2024-02-03 15:07:54,202 (trainer:737) INFO: 24epoch:train:3901-4000batch: iter_time=7.853e-05, forward_time=0.365, loss_ctc=50.291, loss_att=54.840, acc=0.743, loss=53.475, backward_time=0.423, grad_norm=39.109, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.073e-04, train_time=1.451 +[gpub058:0/16] 2024-02-03 15:10:38,588 (trainer:737) INFO: 24epoch:train:4001-4100batch: iter_time=8.046e-05, forward_time=0.298, loss_ctc=44.587, loss_att=41.997, acc=0.758, loss=42.774, backward_time=0.410, grad_norm=33.212, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.073e-04, train_time=1.644 +[gpub058:0/16] 2024-02-03 15:12:51,237 (trainer:737) INFO: 24epoch:train:4101-4200batch: iter_time=8.190e-05, forward_time=0.308, loss_ctc=47.749, loss_att=52.378, acc=0.734, loss=50.990, backward_time=0.414, grad_norm=34.213, clip=100.000, loss_scale=8.879e+33, optim_step_time=0.092, optim0_lr0=2.073e-04, train_time=1.326 +[gpub058:0/16] 2024-02-03 15:15:38,389 (trainer:737) INFO: 24epoch:train:4201-4300batch: iter_time=8.299e-05, forward_time=0.346, loss_ctc=44.791, loss_att=48.640, acc=0.763, loss=47.485, backward_time=0.419, grad_norm=34.478, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.073e-04, train_time=1.671 +[gpub058:0/16] 2024-02-03 15:17:56,100 (trainer:737) INFO: 24epoch:train:4301-4400batch: iter_time=8.707e-05, forward_time=0.325, loss_ctc=42.888, loss_att=38.008, acc=0.757, loss=39.472, backward_time=0.411, grad_norm=32.218, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.072e-04, train_time=1.377 +[gpub058:0/16] 2024-02-03 15:20:18,278 (trainer:737) INFO: 24epoch:train:4401-4500batch: iter_time=8.514e-05, forward_time=0.292, loss_ctc=49.065, loss_att=58.280, acc=0.724, loss=55.516, backward_time=0.406, grad_norm=36.560, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.072e-04, train_time=1.421 +[gpub058:0/16] 2024-02-03 15:23:08,912 (trainer:737) INFO: 24epoch:train:4501-4600batch: iter_time=8.621e-05, forward_time=0.361, loss_ctc=45.215, loss_att=48.474, acc=0.739, loss=47.496, backward_time=0.428, grad_norm=36.174, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.072e-04, train_time=1.706 +[gpub058:0/16] 2024-02-03 15:25:26,753 (trainer:737) INFO: 24epoch:train:4601-4700batch: iter_time=8.379e-05, forward_time=0.313, loss_ctc=50.250, loss_att=51.259, acc=0.739, loss=50.957, backward_time=0.413, grad_norm=36.980, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.071e-04, train_time=1.378 +[gpub058:0/16] 2024-02-03 15:28:20,692 (trainer:737) INFO: 24epoch:train:4701-4800batch: iter_time=8.484e-05, forward_time=0.407, loss_ctc=55.291, loss_att=53.415, acc=0.755, loss=53.978, backward_time=0.434, grad_norm=44.553, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.071e-04, train_time=1.739 +[gpub058:0/16] 2024-02-03 15:30:45,513 (trainer:737) INFO: 24epoch:train:4801-4900batch: iter_time=8.562e-05, forward_time=0.297, loss_ctc=51.355, loss_att=49.199, acc=0.765, loss=49.845, backward_time=0.407, grad_norm=37.482, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.071e-04, train_time=1.448 +[gpub058:0/16] 2024-02-03 15:33:25,828 (trainer:737) INFO: 24epoch:train:4901-5000batch: iter_time=7.854e-05, forward_time=0.380, loss_ctc=52.553, loss_att=53.140, acc=0.723, loss=52.964, backward_time=0.425, grad_norm=40.339, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.071e-04, train_time=1.603 +[gpub058:0/16] 2024-02-03 15:33:45,856 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-03 15:34:04,968 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 15:34:08,559 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 15:34:08,559 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-03 15:34:08,563 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 15:41:06,856 (trainer:737) INFO: 24epoch:train:5001-5100batch: iter_time=3.142, forward_time=0.319, loss_ctc=47.400, loss_att=47.230, acc=0.757, loss=47.281, backward_time=0.409, grad_norm=35.108, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.070e-04, train_time=4.610 +[gpub058:0/16] 2024-02-03 15:43:56,807 (trainer:737) INFO: 24epoch:train:5101-5200batch: iter_time=8.053e-05, forward_time=0.396, loss_ctc=45.610, loss_att=42.886, acc=0.756, loss=43.703, backward_time=0.421, grad_norm=33.760, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=2.070e-04, train_time=1.700 +[gpub058:0/16] 2024-02-03 15:44:11,914 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 15:46:22,350 (trainer:737) INFO: 24epoch:train:5201-5300batch: iter_time=8.942e-05, forward_time=0.384, loss_ctc=49.333, loss_att=52.534, acc=0.747, loss=51.574, backward_time=0.421, grad_norm=36.616, clip=100.000, loss_scale=5.717e+33, optim_step_time=0.096, optim0_lr0=2.070e-04, train_time=1.455 +[gpub058:0/16] 2024-02-03 15:49:19,032 (trainer:737) INFO: 24epoch:train:5301-5400batch: iter_time=8.662e-05, forward_time=0.381, loss_ctc=44.905, loss_att=46.824, acc=0.744, loss=46.248, backward_time=0.421, grad_norm=35.013, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.069e-04, train_time=1.767 +[gpub058:0/16] 2024-02-03 15:51:35,970 (trainer:737) INFO: 24epoch:train:5401-5500batch: iter_time=2.314e-04, forward_time=0.306, loss_ctc=47.340, loss_att=52.180, acc=0.751, loss=50.728, backward_time=0.411, grad_norm=33.837, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.069e-04, train_time=1.369 +[gpub058:0/16] 2024-02-03 15:54:06,741 (trainer:737) INFO: 24epoch:train:5501-5600batch: iter_time=7.996e-05, forward_time=0.412, loss_ctc=44.801, loss_att=43.844, acc=0.754, loss=44.131, backward_time=0.444, grad_norm=35.050, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.069e-04, train_time=1.508 +[gpub058:0/16] 2024-02-03 15:56:53,082 (trainer:737) INFO: 24epoch:train:5601-5700batch: iter_time=8.414e-05, forward_time=0.297, loss_ctc=44.682, loss_att=49.225, acc=0.745, loss=47.862, backward_time=0.403, grad_norm=34.356, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.068e-04, train_time=1.663 +[gpub058:0/16] 2024-02-03 15:59:33,568 (trainer:737) INFO: 24epoch:train:5701-5800batch: iter_time=8.065e-05, forward_time=0.394, loss_ctc=46.768, loss_att=48.832, acc=0.741, loss=48.213, backward_time=0.459, grad_norm=33.568, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.068e-04, train_time=1.605 +[gpub058:0/16] 2024-02-03 16:02:08,191 (trainer:737) INFO: 24epoch:train:5801-5900batch: iter_time=7.800e-05, forward_time=0.346, loss_ctc=49.982, loss_att=55.268, acc=0.724, loss=53.682, backward_time=0.417, grad_norm=40.986, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.068e-04, train_time=1.546 +[gpub058:0/16] 2024-02-03 16:05:05,411 (trainer:737) INFO: 24epoch:train:5901-6000batch: iter_time=8.304e-05, forward_time=0.386, loss_ctc=54.547, loss_att=52.350, acc=0.747, loss=53.009, backward_time=0.422, grad_norm=50.759, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.068e-04, train_time=1.772 +[gpub058:0/16] 2024-02-03 16:07:44,537 (trainer:737) INFO: 24epoch:train:6001-6100batch: iter_time=8.254e-05, forward_time=0.383, loss_ctc=48.893, loss_att=51.889, acc=0.754, loss=50.990, backward_time=0.443, grad_norm=34.096, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.067e-04, train_time=1.591 +[gpub058:0/16] 2024-02-03 16:10:25,893 (trainer:737) INFO: 24epoch:train:6101-6200batch: iter_time=3.960e-04, forward_time=0.308, loss_ctc=49.443, loss_att=42.491, acc=0.770, loss=44.577, backward_time=0.440, grad_norm=39.675, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.067e-04, train_time=1.613 +[gpub058:0/16] 2024-02-03 16:12:03,798 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-03 16:12:22,467 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 16:12:26,090 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 16:12:26,090 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-03 16:12:26,093 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 16:18:14,228 (trainer:737) INFO: 24epoch:train:6201-6300batch: iter_time=3.136, forward_time=0.374, loss_ctc=55.669, loss_att=59.973, acc=0.728, loss=58.682, backward_time=0.443, grad_norm=39.506, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.067e-04, train_time=4.684 +[gpub058:0/16] 2024-02-03 16:21:23,746 (trainer:737) INFO: 24epoch:train:6301-6400batch: iter_time=8.445e-05, forward_time=0.405, loss_ctc=44.937, loss_att=40.707, acc=0.760, loss=41.976, backward_time=0.423, grad_norm=34.873, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.119, optim0_lr0=2.066e-04, train_time=1.895 +[gpub058:0/16] 2024-02-03 16:23:54,237 (trainer:737) INFO: 24epoch:train:6401-6500batch: iter_time=7.508e-05, forward_time=0.404, loss_ctc=49.909, loss_att=53.572, acc=0.745, loss=52.473, backward_time=0.447, grad_norm=36.013, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.066e-04, train_time=1.505 +[gpub058:0/16] 2024-02-03 16:27:02,992 (trainer:737) INFO: 24epoch:train:6501-6600batch: iter_time=2.485e-04, forward_time=0.372, loss_ctc=44.604, loss_att=42.053, acc=0.757, loss=42.818, backward_time=0.431, grad_norm=34.941, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.066e-04, train_time=1.887 +[gpub058:0/16] 2024-02-03 16:29:52,810 (trainer:737) INFO: 24epoch:train:6601-6700batch: iter_time=8.814e-05, forward_time=0.462, loss_ctc=47.550, loss_att=52.694, acc=0.734, loss=51.151, backward_time=0.441, grad_norm=33.912, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.066e-04, train_time=1.698 +[gpub058:0/16] 2024-02-03 16:33:01,146 (trainer:737) INFO: 24epoch:train:6701-6800batch: iter_time=9.158e-04, forward_time=0.424, loss_ctc=44.871, loss_att=48.458, acc=0.763, loss=47.382, backward_time=0.430, grad_norm=35.066, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=2.065e-04, train_time=1.882 +[gpub058:0/16] 2024-02-03 16:35:50,005 (trainer:737) INFO: 24epoch:train:6801-6900batch: iter_time=8.879e-05, forward_time=0.294, loss_ctc=42.675, loss_att=37.800, acc=0.760, loss=39.262, backward_time=0.404, grad_norm=58.051, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.065e-04, train_time=1.690 +[gpub058:0/16] 2024-02-03 16:38:53,332 (trainer:737) INFO: 24epoch:train:6901-7000batch: iter_time=8.701e-05, forward_time=0.409, loss_ctc=49.692, loss_att=59.382, acc=0.722, loss=56.475, backward_time=0.425, grad_norm=38.211, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.065e-04, train_time=1.832 +[gpub058:0/16] 2024-02-03 16:42:02,245 (trainer:737) INFO: 24epoch:train:7001-7100batch: iter_time=4.026e-04, forward_time=0.444, loss_ctc=44.683, loss_att=48.270, acc=0.737, loss=47.194, backward_time=0.422, grad_norm=37.851, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.064e-04, train_time=1.890 +[gpub058:0/16] 2024-02-03 16:44:15,340 (trainer:737) INFO: 24epoch:train:7101-7200batch: iter_time=8.643e-05, forward_time=0.297, loss_ctc=49.876, loss_att=50.326, acc=0.744, loss=50.191, backward_time=0.405, grad_norm=36.826, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.064e-04, train_time=1.330 +[gpub058:0/16] 2024-02-03 16:47:16,356 (trainer:737) INFO: 24epoch:train:7201-7300batch: iter_time=2.637e-04, forward_time=0.401, loss_ctc=55.453, loss_att=53.706, acc=0.757, loss=54.230, backward_time=0.421, grad_norm=46.917, clip=100.000, loss_scale=9.813e+33, optim_step_time=0.094, optim0_lr0=2.064e-04, train_time=1.811 +[gpub058:0/16] 2024-02-03 16:49:28,570 (trainer:737) INFO: 24epoch:train:7301-7400batch: iter_time=8.402e-05, forward_time=0.293, loss_ctc=50.542, loss_att=48.668, acc=0.768, loss=49.230, backward_time=0.410, grad_norm=38.310, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.063e-04, train_time=1.322 +[gpub058:0/16] 2024-02-03 16:52:05,900 (trainer:737) INFO: 24epoch:train:7401-7500batch: iter_time=2.946e-04, forward_time=0.365, loss_ctc=52.136, loss_att=52.942, acc=0.725, loss=52.700, backward_time=0.449, grad_norm=40.749, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.063e-04, train_time=1.573 +[gpub058:0/16] 2024-02-03 16:52:26,066 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-03 16:52:46,086 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 16:52:49,696 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 16:52:49,696 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-03 16:52:49,700 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 17:00:02,342 (trainer:737) INFO: 24epoch:train:7501-7600batch: iter_time=3.053, forward_time=0.288, loss_ctc=47.236, loss_att=47.622, acc=0.758, loss=47.506, backward_time=0.403, grad_norm=37.050, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.063e-04, train_time=4.764 +[gpub058:0/16] 2024-02-03 17:02:35,432 (trainer:737) INFO: 24epoch:train:7601-7700batch: iter_time=3.354e-04, forward_time=0.400, loss_ctc=45.265, loss_att=42.701, acc=0.757, loss=43.470, backward_time=0.432, grad_norm=33.917, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.063e-04, train_time=1.531 +[gpub058:0/16] 2024-02-03 17:05:44,076 (trainer:737) INFO: 24epoch:train:7701-7800batch: iter_time=8.423e-05, forward_time=0.293, loss_ctc=49.854, loss_att=51.621, acc=0.750, loss=51.091, backward_time=0.407, grad_norm=37.846, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.062e-04, train_time=1.886 +[gpub058:0/16] 2024-02-03 17:07:54,606 (trainer:737) INFO: 24epoch:train:7801-7900batch: iter_time=8.698e-05, forward_time=0.296, loss_ctc=45.100, loss_att=45.868, acc=0.748, loss=45.638, backward_time=0.403, grad_norm=35.277, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.062e-04, train_time=1.305 +[gpub058:0/16] 2024-02-03 17:10:57,694 (trainer:737) INFO: 24epoch:train:7901-8000batch: iter_time=9.106e-05, forward_time=0.397, loss_ctc=47.129, loss_att=52.191, acc=0.753, loss=50.672, backward_time=0.429, grad_norm=34.125, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.062e-04, train_time=1.830 +[gpub058:0/16] 2024-02-03 17:13:12,065 (trainer:737) INFO: 24epoch:train:8001-8100batch: iter_time=8.813e-05, forward_time=0.294, loss_ctc=44.685, loss_att=43.386, acc=0.758, loss=43.776, backward_time=0.403, grad_norm=34.992, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.061e-04, train_time=1.344 +[gpub058:0/16] 2024-02-03 17:15:56,146 (trainer:737) INFO: 24epoch:train:8101-8200batch: iter_time=2.467e-04, forward_time=0.387, loss_ctc=44.659, loss_att=48.066, acc=0.748, loss=47.044, backward_time=0.446, grad_norm=33.357, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.061e-04, train_time=1.641 +[gpub058:0/16] 2024-02-03 17:18:41,409 (trainer:737) INFO: 24epoch:train:8201-8300batch: iter_time=8.094e-05, forward_time=0.290, loss_ctc=46.583, loss_att=48.421, acc=0.744, loss=47.870, backward_time=0.404, grad_norm=34.707, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.061e-04, train_time=1.652 +[gpub058:0/16] 2024-02-03 17:20:58,495 (trainer:737) INFO: 24epoch:train:8301-8400batch: iter_time=8.239e-05, forward_time=0.329, loss_ctc=49.925, loss_att=55.443, acc=0.726, loss=53.787, backward_time=0.418, grad_norm=39.620, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.061e-04, train_time=1.370 +[gpub058:0/16] 2024-02-03 17:23:57,691 (trainer:737) INFO: 24epoch:train:8401-8500batch: iter_time=3.870e-04, forward_time=0.385, loss_ctc=53.928, loss_att=51.887, acc=0.750, loss=52.500, backward_time=0.429, grad_norm=51.232, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.060e-04, train_time=1.792 +[gpub058:0/16] 2024-02-03 17:24:44,631 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 17:26:32,028 (trainer:737) INFO: 24epoch:train:8501-8600batch: iter_time=8.029e-05, forward_time=0.297, loss_ctc=48.494, loss_att=52.188, acc=0.754, loss=51.080, backward_time=0.407, grad_norm=35.897, clip=100.000, loss_scale=6.084e+33, optim_step_time=0.092, optim0_lr0=2.060e-04, train_time=1.544 +[gpub058:0/16] 2024-02-03 17:29:18,859 (trainer:737) INFO: 24epoch:train:8601-8700batch: iter_time=9.965e-05, forward_time=0.439, loss_ctc=49.236, loss_att=42.463, acc=0.771, loss=44.495, backward_time=0.427, grad_norm=38.497, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.060e-04, train_time=1.668 +[gpub058:0/16] 2024-02-03 17:30:44,834 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-03 17:31:04,022 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 17:31:07,695 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 17:31:07,696 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-03 17:31:07,699 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 17:37:12,084 (trainer:737) INFO: 24epoch:train:8701-8800batch: iter_time=3.167, forward_time=0.297, loss_ctc=55.351, loss_att=61.790, acc=0.716, loss=59.858, backward_time=0.407, grad_norm=41.544, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.059e-04, train_time=4.732 +[gpub058:0/16] 2024-02-03 17:39:53,321 (trainer:737) INFO: 24epoch:train:8801-8900batch: iter_time=7.767e-05, forward_time=0.381, loss_ctc=44.589, loss_att=41.926, acc=0.756, loss=42.725, backward_time=0.441, grad_norm=35.557, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.059e-04, train_time=1.611 +[gpub058:0/16] 2024-02-03 17:42:25,091 (trainer:737) INFO: 24epoch:train:8901-9000batch: iter_time=8.109e-05, forward_time=0.298, loss_ctc=49.489, loss_att=53.300, acc=0.742, loss=52.156, backward_time=0.404, grad_norm=38.143, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.059e-04, train_time=1.517 +[gpub058:0/16] 2024-02-03 17:44:55,968 (trainer:737) INFO: 24epoch:train:9001-9100batch: iter_time=8.269e-05, forward_time=0.288, loss_ctc=44.115, loss_att=42.963, acc=0.745, loss=43.309, backward_time=0.401, grad_norm=35.684, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.058e-04, train_time=1.510 +[gpub058:0/16] 2024-02-03 17:47:41,233 (trainer:737) INFO: 24epoch:train:9101-9200batch: iter_time=9.050e-05, forward_time=0.412, loss_ctc=47.523, loss_att=52.749, acc=0.727, loss=51.181, backward_time=0.423, grad_norm=36.536, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.058e-04, train_time=1.652 +[gpub058:0/16] 2024-02-03 17:50:10,239 (trainer:737) INFO: 24epoch:train:9201-9300batch: iter_time=9.408e-05, forward_time=0.307, loss_ctc=44.281, loss_att=48.520, acc=0.754, loss=47.248, backward_time=0.413, grad_norm=36.133, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.058e-04, train_time=1.489 +[gpub058:0/16] 2024-02-03 17:52:39,552 (trainer:737) INFO: 24epoch:train:9301-9400batch: iter_time=8.587e-05, forward_time=0.296, loss_ctc=42.664, loss_att=38.226, acc=0.751, loss=39.557, backward_time=0.399, grad_norm=34.493, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.058e-04, train_time=1.494 +[gpub058:0/16] 2024-02-03 17:55:28,029 (trainer:737) INFO: 24epoch:train:9401-9500batch: iter_time=8.526e-05, forward_time=0.413, loss_ctc=48.848, loss_att=56.880, acc=0.725, loss=54.470, backward_time=0.428, grad_norm=38.560, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.057e-04, train_time=1.684 +[gpub058:0/16] 2024-02-03 17:57:39,626 (trainer:737) INFO: 24epoch:train:9501-9600batch: iter_time=8.882e-05, forward_time=0.293, loss_ctc=44.276, loss_att=48.104, acc=0.733, loss=46.955, backward_time=0.403, grad_norm=36.351, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.057e-04, train_time=1.316 +[gpub058:0/16] 2024-02-03 18:00:25,047 (trainer:737) INFO: 24epoch:train:9601-9700batch: iter_time=8.607e-05, forward_time=0.297, loss_ctc=50.062, loss_att=49.045, acc=0.728, loss=49.350, backward_time=0.406, grad_norm=39.295, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.057e-04, train_time=1.654 +[gpub058:0/16] 2024-02-03 18:03:06,267 (trainer:737) INFO: 24epoch:train:9701-9800batch: iter_time=0.002, forward_time=0.401, loss_ctc=55.371, loss_att=52.973, acc=0.752, loss=53.692, backward_time=0.424, grad_norm=47.205, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.056e-04, train_time=1.612 +[gpub058:0/16] 2024-02-03 18:05:16,870 (trainer:737) INFO: 24epoch:train:9801-9900batch: iter_time=8.809e-05, forward_time=0.299, loss_ctc=49.646, loss_att=47.978, acc=0.763, loss=48.478, backward_time=0.405, grad_norm=38.266, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.056e-04, train_time=1.306 +[gpub058:0/16] 2024-02-03 18:08:00,294 (trainer:737) INFO: 24epoch:train:9901-10000batch: iter_time=9.122e-05, forward_time=0.308, loss_ctc=52.019, loss_att=53.119, acc=0.718, loss=52.789, backward_time=0.430, grad_norm=41.042, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.056e-04, train_time=1.634 +[gpub058:0/16] 2024-02-03 18:08:20,549 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub058:0/16] 2024-02-03 18:08:39,661 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 18:08:43,206 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 18:08:43,206 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-03 18:08:43,209 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 18:15:39,301 (trainer:737) INFO: 24epoch:train:10001-10100batch: iter_time=3.169, forward_time=0.366, loss_ctc=46.983, loss_att=47.896, acc=0.747, loss=47.622, backward_time=0.416, grad_norm=36.291, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.056e-04, train_time=4.590 +[gpub058:0/16] 2024-02-03 18:18:22,290 (trainer:737) INFO: 24epoch:train:10101-10200batch: iter_time=7.882e-05, forward_time=0.297, loss_ctc=45.273, loss_att=41.688, acc=0.762, loss=42.764, backward_time=0.400, grad_norm=34.346, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.055e-04, train_time=1.630 +[gpub058:0/16] 2024-02-03 18:20:51,170 (trainer:737) INFO: 24epoch:train:10201-10300batch: iter_time=9.233e-05, forward_time=0.414, loss_ctc=49.450, loss_att=51.228, acc=0.739, loss=50.695, backward_time=0.442, grad_norm=38.358, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.055e-04, train_time=1.489 +[gpub058:0/16] 2024-02-03 18:23:37,571 (trainer:737) INFO: 24epoch:train:10301-10400batch: iter_time=9.190e-05, forward_time=0.293, loss_ctc=44.499, loss_att=45.467, acc=0.737, loss=45.177, backward_time=0.400, grad_norm=34.694, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.055e-04, train_time=1.664 +[gpub058:0/16] 2024-02-03 18:26:06,139 (trainer:737) INFO: 24epoch:train:10401-10500batch: iter_time=8.510e-05, forward_time=0.295, loss_ctc=47.076, loss_att=53.068, acc=0.741, loss=51.270, backward_time=0.404, grad_norm=36.614, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.054e-04, train_time=1.486 +[gpub058:0/16] 2024-02-03 18:28:51,755 (trainer:737) INFO: 24epoch:train:10501-10600batch: iter_time=8.587e-05, forward_time=0.344, loss_ctc=44.598, loss_att=43.409, acc=0.749, loss=43.766, backward_time=0.468, grad_norm=35.470, clip=100.000, loss_scale=9.450e+33, optim_step_time=0.100, optim0_lr0=2.054e-04, train_time=1.656 +[gpub058:0/16] 2024-02-03 18:31:28,060 (trainer:737) INFO: 24epoch:train:10601-10700batch: iter_time=8.310e-05, forward_time=0.288, loss_ctc=44.064, loss_att=48.367, acc=0.738, loss=47.076, backward_time=0.409, grad_norm=36.131, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.054e-04, train_time=1.563 +[gpub058:0/16] 2024-02-03 18:34:10,844 (trainer:737) INFO: 24epoch:train:10701-10800batch: iter_time=8.938e-05, forward_time=0.290, loss_ctc=46.337, loss_att=46.073, acc=0.746, loss=46.152, backward_time=0.403, grad_norm=32.955, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.054e-04, train_time=1.627 +[gpub058:0/16] 2024-02-03 18:36:46,412 (trainer:737) INFO: 24epoch:train:10801-10900batch: iter_time=3.209e-04, forward_time=0.412, loss_ctc=49.381, loss_att=53.555, acc=0.719, loss=52.303, backward_time=0.447, grad_norm=39.028, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.053e-04, train_time=1.554 +[gpub058:0/16] 2024-02-03 18:39:42,674 (trainer:737) INFO: 24epoch:train:10901-11000batch: iter_time=9.056e-05, forward_time=0.297, loss_ctc=54.214, loss_att=50.100, acc=0.740, loss=51.334, backward_time=0.404, grad_norm=49.457, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.053e-04, train_time=1.764 +[gpub058:0/16] 2024-02-03 18:42:06,257 (trainer:737) INFO: 24epoch:train:11001-11100batch: iter_time=8.998e-05, forward_time=0.298, loss_ctc=48.042, loss_att=50.867, acc=0.750, loss=50.020, backward_time=0.404, grad_norm=34.969, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.053e-04, train_time=1.436 +[gpub058:0/16] 2024-02-03 18:45:07,128 (trainer:737) INFO: 24epoch:train:11101-11200batch: iter_time=8.882e-05, forward_time=0.405, loss_ctc=48.909, loss_att=42.141, acc=0.766, loss=44.171, backward_time=0.427, grad_norm=42.649, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.052e-04, train_time=1.807 +[gpub058:0/16] 2024-02-03 18:46:34,206 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-03 18:46:53,459 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 18:46:57,087 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 18:46:57,087 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-03 18:46:57,090 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 18:53:04,004 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 18:53:06,652 (trainer:737) INFO: 24epoch:train:11201-11300batch: iter_time=3.321, forward_time=0.294, loss_ctc=55.348, loss_att=60.903, acc=0.717, loss=59.237, backward_time=0.405, grad_norm=38.556, clip=100.000, loss_scale=1.028e+34, optim_step_time=0.092, optim0_lr0=2.052e-04, train_time=4.796 +[gpub058:0/16] 2024-02-03 18:55:24,527 (trainer:737) INFO: 24epoch:train:11301-11400batch: iter_time=8.317e-05, forward_time=0.324, loss_ctc=44.743, loss_att=41.233, acc=0.756, loss=42.286, backward_time=0.423, grad_norm=36.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.052e-04, train_time=1.378 +[gpub058:0/16] 2024-02-03 18:58:32,215 (trainer:737) INFO: 24epoch:train:11401-11500batch: iter_time=8.455e-05, forward_time=0.332, loss_ctc=49.500, loss_att=50.765, acc=0.749, loss=50.386, backward_time=0.410, grad_norm=37.837, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.052e-04, train_time=1.876 +[gpub058:0/16] 2024-02-03 19:00:42,173 (trainer:737) INFO: 24epoch:train:11501-11600batch: iter_time=9.300e-05, forward_time=0.288, loss_ctc=44.529, loss_att=41.366, acc=0.748, loss=42.315, backward_time=0.401, grad_norm=34.757, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.051e-04, train_time=1.300 +[gpub058:0/16] 2024-02-03 19:03:38,492 (trainer:737) INFO: 24epoch:train:11601-11700batch: iter_time=8.667e-05, forward_time=0.335, loss_ctc=47.115, loss_att=51.446, acc=0.729, loss=50.147, backward_time=0.419, grad_norm=34.679, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.051e-04, train_time=1.763 +[gpub058:0/16] 2024-02-03 19:06:42,930 (trainer:737) INFO: 24epoch:train:11701-11800batch: iter_time=8.763e-05, forward_time=0.357, loss_ctc=44.030, loss_att=48.105, acc=0.754, loss=46.883, backward_time=0.414, grad_norm=35.114, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.051e-04, train_time=1.843 +[gpub058:0/16] 2024-02-03 19:09:07,084 (trainer:737) INFO: 24epoch:train:11801-11900batch: iter_time=8.394e-05, forward_time=0.286, loss_ctc=42.146, loss_att=37.797, acc=0.752, loss=39.101, backward_time=0.399, grad_norm=33.631, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.050e-04, train_time=1.442 +[gpub058:0/16] 2024-02-03 19:12:23,668 (trainer:737) INFO: 24epoch:train:11901-12000batch: iter_time=5.321e-04, forward_time=0.387, loss_ctc=48.384, loss_att=55.714, acc=0.728, loss=53.515, backward_time=0.425, grad_norm=36.123, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.050e-04, train_time=1.963 +[gpub058:0/16] 2024-02-03 19:14:56,721 (trainer:737) INFO: 24epoch:train:12001-12100batch: iter_time=8.127e-05, forward_time=0.290, loss_ctc=44.290, loss_att=48.293, acc=0.733, loss=47.092, backward_time=0.406, grad_norm=37.639, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.050e-04, train_time=1.530 +[gpub058:0/16] 2024-02-03 19:17:28,046 (trainer:737) INFO: 24epoch:train:12101-12200batch: iter_time=8.245e-05, forward_time=0.289, loss_ctc=49.849, loss_att=47.216, acc=0.734, loss=48.006, backward_time=0.405, grad_norm=39.244, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.050e-04, train_time=1.514 +[gpub058:0/16] 2024-02-03 19:20:32,061 (trainer:737) INFO: 24epoch:train:12201-12300batch: iter_time=5.529e-04, forward_time=0.348, loss_ctc=54.538, loss_att=53.120, acc=0.753, loss=53.545, backward_time=0.479, grad_norm=48.346, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.049e-04, train_time=1.840 +[gpub058:0/16] 2024-02-03 19:23:04,244 (trainer:737) INFO: 24epoch:train:12301-12400batch: iter_time=8.902e-05, forward_time=0.291, loss_ctc=49.364, loss_att=47.364, acc=0.765, loss=47.964, backward_time=0.407, grad_norm=38.544, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.049e-04, train_time=1.521 +[gpub058:0/16] 2024-02-03 19:25:37,655 (trainer:737) INFO: 24epoch:train:12401-12500batch: iter_time=8.089e-05, forward_time=0.291, loss_ctc=51.821, loss_att=51.598, acc=0.724, loss=51.665, backward_time=0.407, grad_norm=40.416, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.049e-04, train_time=1.535 +[gpub058:0/16] 2024-02-03 19:25:57,684 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-03 19:26:17,045 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 19:26:20,652 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 19:26:20,652 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-03 19:26:20,655 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 19:33:53,022 (trainer:737) INFO: 24epoch:train:12501-12600batch: iter_time=3.319, forward_time=0.377, loss_ctc=46.813, loss_att=47.716, acc=0.746, loss=47.445, backward_time=0.417, grad_norm=34.963, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.048e-04, train_time=4.953 +[gpub058:0/16] 2024-02-03 19:36:35,364 (trainer:737) INFO: 24epoch:train:12601-12700batch: iter_time=7.872e-05, forward_time=0.289, loss_ctc=45.281, loss_att=41.755, acc=0.763, loss=42.812, backward_time=0.403, grad_norm=35.145, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.048e-04, train_time=1.623 +[gpub058:0/16] 2024-02-03 19:39:11,528 (trainer:737) INFO: 24epoch:train:12701-12800batch: iter_time=8.095e-05, forward_time=0.329, loss_ctc=49.847, loss_att=50.103, acc=0.744, loss=50.026, backward_time=0.453, grad_norm=38.465, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.048e-04, train_time=1.562 +[gpub058:0/16] 2024-02-03 19:41:45,164 (trainer:737) INFO: 24epoch:train:12801-12900batch: iter_time=8.290e-05, forward_time=0.291, loss_ctc=44.787, loss_att=45.030, acc=0.739, loss=44.958, backward_time=0.409, grad_norm=35.019, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.048e-04, train_time=1.536 +[gpub058:0/16] 2024-02-03 19:44:27,580 (trainer:737) INFO: 24epoch:train:12901-13000batch: iter_time=8.760e-05, forward_time=0.290, loss_ctc=46.786, loss_att=53.061, acc=0.742, loss=51.179, backward_time=0.402, grad_norm=35.624, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.047e-04, train_time=1.624 +[gpub058:0/16] 2024-02-03 19:47:19,051 (trainer:737) INFO: 24epoch:train:13001-13100batch: iter_time=2.781e-04, forward_time=0.354, loss_ctc=44.114, loss_att=43.651, acc=0.747, loss=43.790, backward_time=0.480, grad_norm=35.624, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.047e-04, train_time=1.715 +[gpub058:0/16] 2024-02-03 19:49:51,840 (trainer:737) INFO: 24epoch:train:13101-13200batch: iter_time=8.872e-05, forward_time=0.290, loss_ctc=44.125, loss_att=48.182, acc=0.741, loss=46.965, backward_time=0.401, grad_norm=34.399, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.047e-04, train_time=1.527 +[gpub058:0/16] 2024-02-03 19:52:29,628 (trainer:737) INFO: 24epoch:train:13201-13300batch: iter_time=8.455e-05, forward_time=0.289, loss_ctc=46.562, loss_att=46.274, acc=0.746, loss=46.361, backward_time=0.401, grad_norm=33.331, clip=100.000, loss_scale=5.296e+33, optim_step_time=0.092, optim0_lr0=2.046e-04, train_time=1.577 +[gpub058:0/16] 2024-02-03 19:55:17,460 (trainer:737) INFO: 24epoch:train:13301-13400batch: iter_time=6.495e-04, forward_time=0.419, loss_ctc=48.685, loss_att=52.906, acc=0.721, loss=51.640, backward_time=0.424, grad_norm=40.242, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.046e-04, train_time=1.679 +[gpub058:0/16] 2024-02-03 19:57:55,008 (trainer:737) INFO: 24epoch:train:13401-13500batch: iter_time=8.422e-05, forward_time=0.289, loss_ctc=53.082, loss_att=49.837, acc=0.742, loss=50.810, backward_time=0.408, grad_norm=46.790, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.046e-04, train_time=1.575 +[gpub058:0/16] 2024-02-03 20:00:45,911 (trainer:737) INFO: 24epoch:train:13501-13600batch: iter_time=8.489e-05, forward_time=0.292, loss_ctc=47.932, loss_att=50.503, acc=0.751, loss=49.732, backward_time=0.404, grad_norm=36.238, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.046e-04, train_time=1.707 +[gpub058:0/16] 2024-02-03 20:03:28,848 (trainer:737) INFO: 24epoch:train:13601-13700batch: iter_time=8.862e-05, forward_time=0.399, loss_ctc=48.427, loss_att=41.377, acc=0.770, loss=43.492, backward_time=0.441, grad_norm=38.642, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.045e-04, train_time=1.631 +[gpub058:0/16] 2024-02-03 20:05:25,100 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-03 20:05:44,346 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 20:05:47,911 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 20:05:47,911 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-03 20:05:48,055 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 20:11:56,171 (trainer:737) INFO: 24epoch:train:13701-13800batch: iter_time=3.214, forward_time=0.290, loss_ctc=55.052, loss_att=60.370, acc=0.725, loss=58.774, backward_time=0.403, grad_norm=40.340, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.045e-04, train_time=5.073 +[gpub058:0/16] 2024-02-03 20:15:08,185 (trainer:737) INFO: 24epoch:train:13801-13900batch: iter_time=8.562e-05, forward_time=0.372, loss_ctc=44.474, loss_att=41.762, acc=0.761, loss=42.575, backward_time=0.428, grad_norm=34.972, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.045e-04, train_time=1.920 +[gpub058:0/16] 2024-02-03 20:17:48,999 (trainer:737) INFO: 24epoch:train:13901-14000batch: iter_time=8.108e-05, forward_time=0.296, loss_ctc=49.222, loss_att=55.075, acc=0.746, loss=53.319, backward_time=0.407, grad_norm=39.689, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.044e-04, train_time=1.608 +[gpub058:0/16] 2024-02-03 20:20:34,478 (trainer:737) INFO: 24epoch:train:14001-14100batch: iter_time=8.341e-05, forward_time=0.289, loss_ctc=44.251, loss_att=42.342, acc=0.757, loss=42.915, backward_time=0.401, grad_norm=34.909, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.044e-04, train_time=1.654 +[gpub058:0/16] 2024-02-03 20:23:26,867 (trainer:737) INFO: 24epoch:train:14101-14200batch: iter_time=8.028e-05, forward_time=0.416, loss_ctc=46.736, loss_att=51.995, acc=0.737, loss=50.417, backward_time=0.434, grad_norm=36.554, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.044e-04, train_time=1.724 +[gpub058:0/16] 2024-02-03 20:25:47,653 (trainer:737) INFO: 24epoch:train:14201-14300batch: iter_time=8.773e-05, forward_time=0.291, loss_ctc=44.050, loss_att=48.783, acc=0.763, loss=47.363, backward_time=0.404, grad_norm=35.169, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.044e-04, train_time=1.407 +[gpub058:0/16] 2024-02-03 20:28:53,446 (trainer:737) INFO: 24epoch:train:14301-14400batch: iter_time=9.256e-05, forward_time=0.286, loss_ctc=41.713, loss_att=37.358, acc=0.762, loss=38.665, backward_time=0.398, grad_norm=33.280, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.043e-04, train_time=1.857 +[gpub058:0/16] 2024-02-03 20:31:52,810 (trainer:737) INFO: 24epoch:train:14401-14500batch: iter_time=8.539e-05, forward_time=0.365, loss_ctc=48.004, loss_att=59.148, acc=0.724, loss=55.805, backward_time=0.459, grad_norm=36.480, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.043e-04, train_time=1.794 +[gpub058:0/16] 2024-02-03 20:34:44,159 (trainer:737) INFO: 24epoch:train:14501-14600batch: iter_time=8.340e-05, forward_time=0.288, loss_ctc=43.724, loss_att=47.952, acc=0.741, loss=46.684, backward_time=0.401, grad_norm=35.428, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.043e-04, train_time=1.713 +[gpub058:0/16] 2024-02-03 20:37:56,322 (trainer:737) INFO: 24epoch:train:14601-14700batch: iter_time=8.695e-05, forward_time=0.389, loss_ctc=49.474, loss_att=50.647, acc=0.744, loss=50.295, backward_time=0.440, grad_norm=36.562, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.042e-04, train_time=1.922 +[gpub058:0/16] 2024-02-03 20:40:34,696 (trainer:737) INFO: 24epoch:train:14701-14800batch: iter_time=8.227e-05, forward_time=0.291, loss_ctc=54.558, loss_att=53.131, acc=0.759, loss=53.559, backward_time=0.406, grad_norm=47.286, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.042e-04, train_time=1.584 +[gpub058:0/16] 2024-02-03 20:43:46,423 (trainer:737) INFO: 24epoch:train:14801-14900batch: iter_time=8.172e-05, forward_time=0.390, loss_ctc=48.836, loss_att=48.508, acc=0.770, loss=48.606, backward_time=0.461, grad_norm=39.383, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.042e-04, train_time=1.917 +[gpub058:0/16] 2024-02-03 20:46:38,645 (trainer:737) INFO: 24epoch:train:14901-15000batch: iter_time=8.132e-05, forward_time=0.291, loss_ctc=51.345, loss_att=52.698, acc=0.727, loss=52.292, backward_time=0.407, grad_norm=39.764, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.042e-04, train_time=1.722 +[gpub058:0/16] 2024-02-03 21:24:59,484 (trainer:343) INFO: 24epoch results: [train] iter_time=0.255, forward_time=0.337, loss_ctc=48.334, loss_att=49.224, acc=0.744, loss=48.957, backward_time=0.420, grad_norm=38.298, clip=100.000, loss_scale=7.243e+33, optim_step_time=0.096, optim0_lr0=2.063e-04, train_time=1.870, time=7 hours, 47 minutes and 58.02 seconds, total_count=390000, gpu_max_cached_mem_GB=43.281, [valid] loss_ctc=39.756, cer_ctc=0.202, loss_att=41.411, acc=0.663, cer=0.327, wer=0.995, loss=40.915, time=37 minutes and 56.35 seconds, total_count=121446, gpu_max_cached_mem_GB=43.281 +[gpub058:0/16] 2024-02-03 21:25:10,898 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub058:0/16] 2024-02-03 21:25:10,945 (trainer:272) INFO: 25/45epoch started. Estimated time to finish: 1 week, 6 hours and 4 minutes +[gpub058:0/16] 2024-02-03 21:25:10,954 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-03 21:25:29,421 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 21:25:33,138 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 21:25:33,138 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-03 21:25:33,141 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 21:32:39,656 (trainer:737) INFO: 25epoch:train:1-100batch: iter_time=3.015, forward_time=0.365, loss_ctc=44.779, loss_att=49.013, acc=0.739, loss=47.742, backward_time=0.411, grad_norm=38.287, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.041e-04, train_time=4.487 +[gpub058:0/16] 2024-02-03 21:35:13,938 (trainer:737) INFO: 25epoch:train:101-200batch: iter_time=2.041e-04, forward_time=0.392, loss_ctc=47.097, loss_att=38.440, acc=0.769, loss=41.037, backward_time=0.442, grad_norm=35.373, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=2.041e-04, train_time=1.542 +[gpub058:0/16] 2024-02-03 21:37:58,967 (trainer:737) INFO: 25epoch:train:201-300batch: iter_time=8.136e-05, forward_time=0.308, loss_ctc=46.894, loss_att=44.822, acc=0.761, loss=45.443, backward_time=0.421, grad_norm=35.860, clip=100.000, loss_scale=1.059e+34, optim_step_time=0.095, optim0_lr0=2.041e-04, train_time=1.650 +[gpub058:0/16] 2024-02-03 21:38:31,862 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 21:40:36,150 (trainer:737) INFO: 25epoch:train:301-400batch: iter_time=7.995e-05, forward_time=0.382, loss_ctc=60.282, loss_att=57.073, acc=0.729, loss=58.036, backward_time=0.428, grad_norm=46.972, clip=100.000, loss_scale=1.248e+34, optim_step_time=0.107, optim0_lr0=2.040e-04, train_time=1.572 +[gpub058:0/16] 2024-02-03 21:43:11,933 (trainer:737) INFO: 25epoch:train:401-500batch: iter_time=8.441e-05, forward_time=0.409, loss_ctc=48.332, loss_att=40.516, acc=0.748, loss=42.861, backward_time=0.458, grad_norm=39.056, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.040e-04, train_time=1.557 +[gpub058:0/16] 2024-02-03 21:45:43,597 (trainer:737) INFO: 25epoch:train:501-600batch: iter_time=1.724e-04, forward_time=0.321, loss_ctc=41.549, loss_att=41.175, acc=0.753, loss=41.287, backward_time=0.408, grad_norm=33.569, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.040e-04, train_time=1.516 +[gpub058:0/16] 2024-02-03 21:48:18,736 (trainer:737) INFO: 25epoch:train:601-700batch: iter_time=8.655e-05, forward_time=0.381, loss_ctc=52.466, loss_att=51.839, acc=0.734, loss=52.027, backward_time=0.437, grad_norm=40.070, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.040e-04, train_time=1.551 +[gpub058:0/16] 2024-02-03 21:51:12,964 (trainer:737) INFO: 25epoch:train:701-800batch: iter_time=8.679e-05, forward_time=0.351, loss_ctc=56.025, loss_att=53.938, acc=0.738, loss=54.564, backward_time=0.429, grad_norm=41.639, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.039e-04, train_time=1.742 +[gpub058:0/16] 2024-02-03 21:53:39,662 (trainer:737) INFO: 25epoch:train:801-900batch: iter_time=2.862e-04, forward_time=0.391, loss_ctc=56.006, loss_att=55.827, acc=0.741, loss=55.881, backward_time=0.443, grad_norm=46.199, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.039e-04, train_time=1.467 +[gpub058:0/16] 2024-02-03 21:56:40,954 (trainer:737) INFO: 25epoch:train:901-1000batch: iter_time=5.603e-04, forward_time=0.378, loss_ctc=51.883, loss_att=47.338, acc=0.751, loss=48.702, backward_time=0.460, grad_norm=41.179, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.039e-04, train_time=1.812 +[gpub058:0/16] 2024-02-03 21:59:25,895 (trainer:737) INFO: 25epoch:train:1001-1100batch: iter_time=8.882e-05, forward_time=0.418, loss_ctc=52.403, loss_att=47.385, acc=0.749, loss=48.890, backward_time=0.460, grad_norm=38.586, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.038e-04, train_time=1.650 +[gpub058:0/16] 2024-02-03 22:02:09,009 (trainer:737) INFO: 25epoch:train:1101-1200batch: iter_time=1.924e-04, forward_time=0.336, loss_ctc=53.728, loss_att=56.763, acc=0.743, loss=55.853, backward_time=0.414, grad_norm=37.852, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.038e-04, train_time=1.631 +[gpub058:0/16] 2024-02-03 22:03:55,040 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-03 22:04:14,039 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 22:04:17,650 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 22:04:17,650 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-03 22:04:17,653 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 22:10:41,577 (trainer:737) INFO: 25epoch:train:1201-1300batch: iter_time=3.527, forward_time=0.367, loss_ctc=46.027, loss_att=52.129, acc=0.734, loss=50.299, backward_time=0.432, grad_norm=37.250, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.038e-04, train_time=5.125 +[gpub058:0/16] 2024-02-03 22:13:18,580 (trainer:737) INFO: 25epoch:train:1301-1400batch: iter_time=8.042e-05, forward_time=0.307, loss_ctc=43.533, loss_att=37.971, acc=0.772, loss=39.639, backward_time=0.405, grad_norm=55.251, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.038e-04, train_time=1.570 +[gpub058:0/16] 2024-02-03 22:16:04,889 (trainer:737) INFO: 25epoch:train:1401-1500batch: iter_time=8.676e-05, forward_time=0.413, loss_ctc=47.729, loss_att=45.148, acc=0.759, loss=45.922, backward_time=0.436, grad_norm=36.541, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.037e-04, train_time=1.663 +[gpub058:0/16] 2024-02-03 22:18:31,990 (trainer:737) INFO: 25epoch:train:1501-1600batch: iter_time=8.126e-05, forward_time=0.291, loss_ctc=50.535, loss_att=45.310, acc=0.759, loss=46.878, backward_time=0.406, grad_norm=38.442, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.037e-04, train_time=1.471 +[gpub058:0/16] 2024-02-03 22:21:11,245 (trainer:737) INFO: 25epoch:train:1601-1700batch: iter_time=8.420e-05, forward_time=0.308, loss_ctc=49.159, loss_att=47.523, acc=0.740, loss=48.014, backward_time=0.410, grad_norm=36.425, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.037e-04, train_time=1.593 +[gpub058:0/16] 2024-02-03 22:22:43,523 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-03 22:23:49,747 (trainer:737) INFO: 25epoch:train:1701-1800batch: iter_time=4.658e-04, forward_time=0.375, loss_ctc=49.400, loss_att=46.980, acc=0.743, loss=47.706, backward_time=0.429, grad_norm=37.616, clip=100.000, loss_scale=8.182e+33, optim_step_time=0.100, optim0_lr0=2.036e-04, train_time=1.585 +[gpub058:0/16] 2024-02-03 22:26:17,933 (trainer:737) INFO: 25epoch:train:1801-1900batch: iter_time=8.465e-05, forward_time=0.311, loss_ctc=51.117, loss_att=47.292, acc=0.742, loss=48.439, backward_time=0.411, grad_norm=40.334, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.036e-04, train_time=1.481 +[gpub058:0/16] 2024-02-03 22:28:59,074 (trainer:737) INFO: 25epoch:train:1901-2000batch: iter_time=8.313e-05, forward_time=0.311, loss_ctc=48.077, loss_att=50.444, acc=0.742, loss=49.734, backward_time=0.406, grad_norm=35.278, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.036e-04, train_time=1.612 +[gpub058:0/16] 2024-02-03 22:31:38,061 (trainer:737) INFO: 25epoch:train:2001-2100batch: iter_time=8.326e-05, forward_time=0.407, loss_ctc=57.757, loss_att=51.488, acc=0.743, loss=53.369, backward_time=0.445, grad_norm=44.687, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.036e-04, train_time=1.589 +[gpub058:0/16] 2024-02-03 22:34:23,137 (trainer:737) INFO: 25epoch:train:2101-2200batch: iter_time=8.137e-05, forward_time=0.318, loss_ctc=46.553, loss_att=50.620, acc=0.749, loss=49.400, backward_time=0.409, grad_norm=35.885, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.035e-04, train_time=1.651 +[gpub058:0/16] 2024-02-03 22:36:50,916 (trainer:737) INFO: 25epoch:train:2201-2300batch: iter_time=8.103e-05, forward_time=0.306, loss_ctc=53.501, loss_att=46.196, acc=0.747, loss=48.388, backward_time=0.415, grad_norm=44.445, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.035e-04, train_time=1.477 +[gpub058:0/16] 2024-02-03 22:39:55,175 (trainer:737) INFO: 25epoch:train:2301-2400batch: iter_time=8.085e-05, forward_time=0.393, loss_ctc=55.561, loss_att=55.237, acc=0.754, loss=55.334, backward_time=0.461, grad_norm=37.288, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.035e-04, train_time=1.843 +[gpub058:0/16] 2024-02-03 22:42:22,001 (trainer:737) INFO: 25epoch:train:2401-2500batch: iter_time=7.909e-05, forward_time=0.304, loss_ctc=47.600, loss_att=49.971, acc=0.743, loss=49.260, backward_time=0.405, grad_norm=35.467, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.035e-04, train_time=1.468 +[gpub058:0/16] 2024-02-03 22:42:42,029 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-03 22:43:01,069 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 22:43:04,712 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 22:43:04,712 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-03 22:43:04,715 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 22:50:35,883 (trainer:737) INFO: 25epoch:train:2501-2600batch: iter_time=3.302, forward_time=0.402, loss_ctc=44.148, loss_att=48.782, acc=0.742, loss=47.392, backward_time=0.427, grad_norm=39.106, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.034e-04, train_time=4.938 +[gpub058:0/16] 2024-02-03 22:53:21,232 (trainer:737) INFO: 25epoch:train:2601-2700batch: iter_time=8.005e-05, forward_time=0.290, loss_ctc=45.940, loss_att=37.851, acc=0.771, loss=40.278, backward_time=0.402, grad_norm=34.079, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.034e-04, train_time=1.654 +[gpub058:0/16] 2024-02-03 22:56:14,486 (trainer:737) INFO: 25epoch:train:2701-2800batch: iter_time=8.119e-05, forward_time=0.421, loss_ctc=45.641, loss_att=44.120, acc=0.761, loss=44.576, backward_time=0.428, grad_norm=34.640, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.034e-04, train_time=1.732 +[gpub058:0/16] 2024-02-03 22:58:46,050 (trainer:737) INFO: 25epoch:train:2801-2900batch: iter_time=8.117e-05, forward_time=0.398, loss_ctc=56.629, loss_att=56.206, acc=0.733, loss=56.333, backward_time=0.420, grad_norm=40.922, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=2.033e-04, train_time=1.516 +[gpub058:0/16] 2024-02-03 23:01:47,074 (trainer:737) INFO: 25epoch:train:2901-3000batch: iter_time=8.998e-05, forward_time=0.320, loss_ctc=45.218, loss_att=38.816, acc=0.755, loss=40.736, backward_time=0.422, grad_norm=34.181, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.033e-04, train_time=1.810 +[gpub058:0/16] 2024-02-03 23:04:25,417 (trainer:737) INFO: 25epoch:train:3001-3100batch: iter_time=2.165e-04, forward_time=0.473, loss_ctc=41.266, loss_att=41.366, acc=0.753, loss=41.336, backward_time=0.421, grad_norm=33.889, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.033e-04, train_time=1.584 +[gpub058:0/16] 2024-02-03 23:07:11,678 (trainer:737) INFO: 25epoch:train:3101-3200batch: iter_time=9.226e-05, forward_time=0.426, loss_ctc=50.359, loss_att=50.614, acc=0.739, loss=50.538, backward_time=0.435, grad_norm=40.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.033e-04, train_time=1.662 +[gpub058:0/16] 2024-02-03 23:10:10,172 (trainer:737) INFO: 25epoch:train:3201-3300batch: iter_time=8.469e-05, forward_time=0.291, loss_ctc=53.507, loss_att=52.805, acc=0.746, loss=53.015, backward_time=0.403, grad_norm=38.455, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.032e-04, train_time=1.785 +[gpub058:0/16] 2024-02-03 23:12:40,640 (trainer:737) INFO: 25epoch:train:3301-3400batch: iter_time=8.636e-05, forward_time=0.432, loss_ctc=54.615, loss_att=55.664, acc=0.742, loss=55.350, backward_time=0.428, grad_norm=48.483, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.032e-04, train_time=1.504 +[gpub058:0/16] 2024-02-03 23:15:25,753 (trainer:737) INFO: 25epoch:train:3401-3500batch: iter_time=9.265e-05, forward_time=0.290, loss_ctc=50.346, loss_att=46.536, acc=0.753, loss=47.679, backward_time=0.401, grad_norm=38.825, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.032e-04, train_time=1.652 +[gpub058:0/16] 2024-02-03 23:18:02,665 (trainer:737) INFO: 25epoch:train:3501-3600batch: iter_time=8.805e-05, forward_time=0.294, loss_ctc=50.634, loss_att=45.793, acc=0.756, loss=47.246, backward_time=0.410, grad_norm=37.836, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.031e-04, train_time=1.569 +[gpub058:0/16] 2024-02-03 23:20:57,167 (trainer:737) INFO: 25epoch:train:3601-3700batch: iter_time=0.001, forward_time=0.463, loss_ctc=52.819, loss_att=55.786, acc=0.746, loss=54.896, backward_time=0.427, grad_norm=37.634, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.031e-04, train_time=1.744 +[gpub058:0/16] 2024-02-03 23:22:28,993 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-03 23:22:48,185 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-03 23:22:51,922 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-03 23:22:51,922 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-03 23:22:51,926 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-03 23:29:08,842 (trainer:737) INFO: 25epoch:train:3701-3800batch: iter_time=3.249, forward_time=0.296, loss_ctc=45.081, loss_att=51.101, acc=0.738, loss=49.295, backward_time=0.403, grad_norm=37.976, clip=100.000, loss_scale=7.373e+33, optim_step_time=0.092, optim0_lr0=2.031e-04, train_time=4.917 +[gpub058:0/16] 2024-02-03 23:31:36,483 (trainer:737) INFO: 25epoch:train:3801-3900batch: iter_time=8.313e-05, forward_time=0.292, loss_ctc=43.068, loss_att=38.001, acc=0.772, loss=39.521, backward_time=0.411, grad_norm=33.229, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.031e-04, train_time=1.476 +[gpub058:0/16] 2024-02-03 23:34:17,969 (trainer:737) INFO: 25epoch:train:3901-4000batch: iter_time=5.559e-04, forward_time=0.392, loss_ctc=46.819, loss_att=45.037, acc=0.760, loss=45.571, backward_time=0.415, grad_norm=36.330, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.030e-04, train_time=1.614 +[gpub058:0/16] 2024-02-03 23:36:59,738 (trainer:737) INFO: 25epoch:train:4001-4100batch: iter_time=8.345e-05, forward_time=0.290, loss_ctc=48.956, loss_att=45.135, acc=0.760, loss=46.281, backward_time=0.405, grad_norm=34.919, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.030e-04, train_time=1.617 +[gpub058:0/16] 2024-02-03 23:39:34,894 (trainer:737) INFO: 25epoch:train:4101-4200batch: iter_time=8.007e-05, forward_time=0.289, loss_ctc=47.812, loss_att=46.503, acc=0.747, loss=46.895, backward_time=0.402, grad_norm=34.626, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.030e-04, train_time=1.552 +[gpub058:0/16] 2024-02-03 23:42:09,646 (trainer:737) INFO: 25epoch:train:4201-4300batch: iter_time=7.946e-05, forward_time=0.433, loss_ctc=48.173, loss_att=46.242, acc=0.749, loss=46.821, backward_time=0.440, grad_norm=37.249, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.029e-04, train_time=1.547 +[gpub058:0/16] 2024-02-03 23:44:46,537 (trainer:737) INFO: 25epoch:train:4301-4400batch: iter_time=8.729e-05, forward_time=0.291, loss_ctc=49.977, loss_att=46.326, acc=0.746, loss=47.421, backward_time=0.403, grad_norm=37.772, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.029e-04, train_time=1.569 +[gpub058:0/16] 2024-02-03 23:47:13,234 (trainer:737) INFO: 25epoch:train:4401-4500batch: iter_time=8.448e-05, forward_time=0.291, loss_ctc=47.433, loss_att=49.874, acc=0.744, loss=49.142, backward_time=0.406, grad_norm=36.394, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.029e-04, train_time=1.466 +[gpub058:0/16] 2024-02-03 23:49:46,517 (trainer:737) INFO: 25epoch:train:4501-4600batch: iter_time=8.604e-05, forward_time=0.292, loss_ctc=57.529, loss_att=52.281, acc=0.746, loss=53.855, backward_time=0.411, grad_norm=46.724, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.029e-04, train_time=1.534 +[gpub058:0/16] 2024-02-03 23:52:49,678 (trainer:737) INFO: 25epoch:train:4601-4700batch: iter_time=9.784e-05, forward_time=0.433, loss_ctc=46.057, loss_att=50.306, acc=0.750, loss=49.032, backward_time=0.427, grad_norm=36.046, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.028e-04, train_time=1.831 +[gpub058:0/16] 2024-02-03 23:55:21,666 (trainer:737) INFO: 25epoch:train:4701-4800batch: iter_time=8.503e-05, forward_time=0.289, loss_ctc=52.508, loss_att=45.744, acc=0.750, loss=47.773, backward_time=0.402, grad_norm=41.175, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.028e-04, train_time=1.519 +[gpub058:0/16] 2024-02-03 23:57:45,235 (trainer:737) INFO: 25epoch:train:4801-4900batch: iter_time=8.599e-05, forward_time=0.300, loss_ctc=54.924, loss_att=54.285, acc=0.757, loss=54.476, backward_time=0.410, grad_norm=35.596, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.028e-04, train_time=1.436 +[gpub058:0/16] 2024-02-04 00:00:52,031 (trainer:737) INFO: 25epoch:train:4901-5000batch: iter_time=8.174e-05, forward_time=0.371, loss_ctc=47.378, loss_att=49.485, acc=0.749, loss=48.853, backward_time=0.453, grad_norm=36.264, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.028e-04, train_time=1.868 +[gpub058:0/16] 2024-02-04 00:01:12,059 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-04 00:01:31,442 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 00:01:35,088 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 00:01:35,088 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-04 00:01:35,091 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 00:08:40,920 (trainer:737) INFO: 25epoch:train:5001-5100batch: iter_time=3.294, forward_time=0.304, loss_ctc=43.247, loss_att=50.542, acc=0.736, loss=48.353, backward_time=0.407, grad_norm=37.062, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.027e-04, train_time=4.688 +[gpub058:0/16] 2024-02-04 00:11:23,981 (trainer:737) INFO: 25epoch:train:5101-5200batch: iter_time=8.485e-05, forward_time=0.403, loss_ctc=45.204, loss_att=38.848, acc=0.764, loss=40.755, backward_time=0.420, grad_norm=34.790, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=2.027e-04, train_time=1.631 +[gpub058:0/16] 2024-02-04 00:14:09,468 (trainer:737) INFO: 25epoch:train:5201-5300batch: iter_time=8.556e-05, forward_time=0.289, loss_ctc=45.281, loss_att=43.018, acc=0.761, loss=43.697, backward_time=0.409, grad_norm=33.669, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.027e-04, train_time=1.652 +[gpub058:0/16] 2024-02-04 00:17:11,367 (trainer:737) INFO: 25epoch:train:5301-5400batch: iter_time=9.014e-05, forward_time=0.474, loss_ctc=55.747, loss_att=53.700, acc=0.730, loss=54.314, backward_time=0.433, grad_norm=42.338, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.026e-04, train_time=1.821 +[gpub058:0/16] 2024-02-04 00:19:53,779 (trainer:737) INFO: 25epoch:train:5401-5500batch: iter_time=8.717e-05, forward_time=0.446, loss_ctc=44.481, loss_att=39.981, acc=0.744, loss=41.331, backward_time=0.441, grad_norm=38.311, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.026e-04, train_time=1.624 +[gpub058:0/16] 2024-02-04 00:22:57,522 (trainer:737) INFO: 25epoch:train:5501-5600batch: iter_time=9.114e-05, forward_time=0.288, loss_ctc=40.555, loss_att=42.008, acc=0.746, loss=41.572, backward_time=0.398, grad_norm=35.019, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.026e-04, train_time=1.837 +[gpub058:0/16] 2024-02-04 00:25:35,258 (trainer:737) INFO: 25epoch:train:5601-5700batch: iter_time=8.599e-05, forward_time=0.450, loss_ctc=50.088, loss_att=51.315, acc=0.734, loss=50.947, backward_time=0.450, grad_norm=38.349, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.026e-04, train_time=1.577 +[gpub058:0/16] 2024-02-04 00:28:33,326 (trainer:737) INFO: 25epoch:train:5701-5800batch: iter_time=9.910e-05, forward_time=0.337, loss_ctc=53.424, loss_att=52.511, acc=0.739, loss=52.785, backward_time=0.463, grad_norm=38.581, clip=100.000, loss_scale=1.475e+34, optim_step_time=0.096, optim0_lr0=2.025e-04, train_time=1.780 +[gpub058:0/16] 2024-02-04 00:30:23,519 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 00:30:58,182 (trainer:737) INFO: 25epoch:train:5801-5900batch: iter_time=9.467e-05, forward_time=0.290, loss_ctc=53.208, loss_att=55.206, acc=0.729, loss=54.607, backward_time=0.404, grad_norm=48.406, clip=100.000, loss_scale=1.804e+34, optim_step_time=0.092, optim0_lr0=2.025e-04, train_time=1.448 +[gpub058:0/16] 2024-02-04 00:34:19,848 (trainer:737) INFO: 25epoch:train:5901-6000batch: iter_time=9.350e-05, forward_time=0.425, loss_ctc=50.061, loss_att=46.996, acc=0.743, loss=47.916, backward_time=0.428, grad_norm=39.272, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.025e-04, train_time=2.016 +[gpub058:0/16] 2024-02-04 00:37:07,521 (trainer:737) INFO: 25epoch:train:6001-6100batch: iter_time=8.892e-05, forward_time=0.343, loss_ctc=50.687, loss_att=46.328, acc=0.746, loss=47.636, backward_time=0.474, grad_norm=38.316, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.024e-04, train_time=1.677 +[gpub058:0/16] 2024-02-04 00:39:56,004 (trainer:737) INFO: 25epoch:train:6101-6200batch: iter_time=9.071e-05, forward_time=0.291, loss_ctc=52.737, loss_att=55.782, acc=0.742, loss=54.869, backward_time=0.402, grad_norm=37.869, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.024e-04, train_time=1.685 +[gpub058:0/16] 2024-02-04 00:42:18,670 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-04 00:42:38,030 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 00:42:41,638 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 00:42:41,638 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-04 00:42:41,641 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 00:52:24,382 (trainer:737) INFO: 25epoch:train:6201-6300batch: iter_time=3.341, forward_time=0.401, loss_ctc=44.648, loss_att=51.658, acc=0.732, loss=49.555, backward_time=0.423, grad_norm=35.982, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=2.024e-04, train_time=7.484 +[gpub058:0/16] 2024-02-04 00:55:13,953 (trainer:737) INFO: 25epoch:train:6301-6400batch: iter_time=8.041e-05, forward_time=0.364, loss_ctc=42.921, loss_att=38.179, acc=0.769, loss=39.602, backward_time=0.451, grad_norm=33.331, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.024e-04, train_time=1.695 +[gpub058:0/16] 2024-02-04 00:57:44,321 (trainer:737) INFO: 25epoch:train:6401-6500batch: iter_time=8.447e-05, forward_time=0.290, loss_ctc=46.946, loss_att=43.616, acc=0.756, loss=44.615, backward_time=0.404, grad_norm=35.665, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.023e-04, train_time=1.503 +[gpub058:0/16] 2024-02-04 01:00:19,699 (trainer:737) INFO: 25epoch:train:6501-6600batch: iter_time=9.264e-05, forward_time=0.426, loss_ctc=48.533, loss_att=45.690, acc=0.754, loss=46.543, backward_time=0.428, grad_norm=36.136, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.023e-04, train_time=1.554 +[gpub058:0/16] 2024-02-04 01:03:06,349 (trainer:737) INFO: 25epoch:train:6601-6700batch: iter_time=8.512e-05, forward_time=0.291, loss_ctc=47.671, loss_att=43.318, acc=0.743, loss=44.624, backward_time=0.402, grad_norm=35.142, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.023e-04, train_time=1.666 +[gpub058:0/16] 2024-02-04 01:05:29,536 (trainer:737) INFO: 25epoch:train:6701-6800batch: iter_time=1.019e-04, forward_time=0.291, loss_ctc=47.113, loss_att=46.771, acc=0.737, loss=46.874, backward_time=0.401, grad_norm=37.753, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.023e-04, train_time=1.432 +[gpub058:0/16] 2024-02-04 01:08:14,770 (trainer:737) INFO: 25epoch:train:6801-6900batch: iter_time=9.136e-05, forward_time=0.374, loss_ctc=49.495, loss_att=45.599, acc=0.751, loss=46.768, backward_time=0.480, grad_norm=37.412, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.022e-04, train_time=1.652 +[gpub058:0/16] 2024-02-04 01:11:05,362 (trainer:737) INFO: 25epoch:train:6901-7000batch: iter_time=8.780e-05, forward_time=0.290, loss_ctc=47.404, loss_att=50.071, acc=0.736, loss=49.271, backward_time=0.406, grad_norm=35.468, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.022e-04, train_time=1.706 +[gpub058:0/16] 2024-02-04 01:13:51,844 (trainer:737) INFO: 25epoch:train:7001-7100batch: iter_time=8.197e-05, forward_time=0.384, loss_ctc=55.934, loss_att=50.768, acc=0.737, loss=52.318, backward_time=0.478, grad_norm=47.547, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.022e-04, train_time=1.664 +[gpub058:0/16] 2024-02-04 01:16:25,696 (trainer:737) INFO: 25epoch:train:7101-7200batch: iter_time=9.088e-05, forward_time=0.291, loss_ctc=45.747, loss_att=49.255, acc=0.737, loss=48.202, backward_time=0.402, grad_norm=36.535, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.021e-04, train_time=1.539 +[gpub058:0/16] 2024-02-04 01:19:37,860 (trainer:737) INFO: 25epoch:train:7201-7300batch: iter_time=5.639e-04, forward_time=0.402, loss_ctc=51.918, loss_att=45.439, acc=0.743, loss=47.383, backward_time=0.512, grad_norm=42.785, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.021e-04, train_time=1.921 +[gpub058:0/16] 2024-02-04 01:22:07,952 (trainer:737) INFO: 25epoch:train:7301-7400batch: iter_time=8.648e-05, forward_time=0.291, loss_ctc=54.540, loss_att=53.874, acc=0.752, loss=54.074, backward_time=0.407, grad_norm=34.656, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.021e-04, train_time=1.500 +[gpub058:0/16] 2024-02-04 01:24:43,967 (trainer:737) INFO: 25epoch:train:7401-7500batch: iter_time=8.045e-05, forward_time=0.294, loss_ctc=46.710, loss_att=49.515, acc=0.743, loss=48.674, backward_time=0.427, grad_norm=35.516, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.021e-04, train_time=1.559 +[gpub058:0/16] 2024-02-04 01:25:03,995 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-04 01:25:22,855 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 01:25:26,709 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 01:25:26,709 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-04 01:25:26,712 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 01:31:47,054 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 01:33:14,138 (trainer:737) INFO: 25epoch:train:7501-7600batch: iter_time=3.644, forward_time=0.358, loss_ctc=42.862, loss_att=49.095, acc=0.745, loss=47.225, backward_time=0.413, grad_norm=35.315, clip=100.000, loss_scale=7.133e+33, optim_step_time=0.107, optim0_lr0=2.020e-04, train_time=5.102 +[gpub058:0/16] 2024-02-04 01:35:38,916 (trainer:737) INFO: 25epoch:train:7601-7700batch: iter_time=8.324e-05, forward_time=0.290, loss_ctc=45.113, loss_att=37.576, acc=0.777, loss=39.837, backward_time=0.403, grad_norm=33.411, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.020e-04, train_time=1.448 +[gpub058:0/16] 2024-02-04 01:38:19,277 (trainer:737) INFO: 25epoch:train:7701-7800batch: iter_time=8.972e-05, forward_time=0.378, loss_ctc=45.152, loss_att=43.746, acc=0.767, loss=44.168, backward_time=0.440, grad_norm=34.132, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.020e-04, train_time=1.603 +[gpub058:0/16] 2024-02-04 01:40:52,138 (trainer:737) INFO: 25epoch:train:7801-7900batch: iter_time=9.209e-05, forward_time=0.292, loss_ctc=54.847, loss_att=55.980, acc=0.736, loss=55.640, backward_time=0.410, grad_norm=40.325, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.020e-04, train_time=1.528 +[gpub058:0/16] 2024-02-04 01:43:27,240 (trainer:737) INFO: 25epoch:train:7901-8000batch: iter_time=8.981e-05, forward_time=0.290, loss_ctc=44.776, loss_att=39.579, acc=0.753, loss=41.138, backward_time=0.405, grad_norm=34.153, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.019e-04, train_time=1.551 +[gpub058:0/16] 2024-02-04 01:46:01,178 (trainer:737) INFO: 25epoch:train:8001-8100batch: iter_time=9.604e-04, forward_time=0.296, loss_ctc=40.671, loss_att=41.152, acc=0.756, loss=41.008, backward_time=0.405, grad_norm=34.425, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.019e-04, train_time=1.539 +[gpub058:0/16] 2024-02-04 01:48:56,033 (trainer:737) INFO: 25epoch:train:8101-8200batch: iter_time=8.908e-05, forward_time=0.415, loss_ctc=48.977, loss_att=50.277, acc=0.742, loss=49.887, backward_time=0.444, grad_norm=37.934, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.019e-04, train_time=1.749 +[gpub058:0/16] 2024-02-04 01:49:48,732 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 01:51:21,003 (trainer:737) INFO: 25epoch:train:8201-8300batch: iter_time=8.263e-05, forward_time=0.292, loss_ctc=53.186, loss_att=52.511, acc=0.745, loss=52.714, backward_time=0.409, grad_norm=39.744, clip=100.000, loss_scale=3.593e+33, optim_step_time=0.095, optim0_lr0=2.018e-04, train_time=1.449 +[gpub058:0/16] 2024-02-04 01:53:55,040 (trainer:737) INFO: 25epoch:train:8301-8400batch: iter_time=0.002, forward_time=0.292, loss_ctc=52.883, loss_att=55.854, acc=0.748, loss=54.963, backward_time=0.405, grad_norm=45.913, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.018e-04, train_time=1.540 +[gpub058:0/16] 2024-02-04 01:56:53,696 (trainer:737) INFO: 25epoch:train:8401-8500batch: iter_time=2.168e-04, forward_time=0.437, loss_ctc=49.695, loss_att=46.202, acc=0.756, loss=47.250, backward_time=0.466, grad_norm=38.197, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.018e-04, train_time=1.786 +[gpub058:0/16] 2024-02-04 01:59:39,085 (trainer:737) INFO: 25epoch:train:8501-8600batch: iter_time=8.705e-05, forward_time=0.296, loss_ctc=49.595, loss_att=45.701, acc=0.757, loss=46.869, backward_time=0.406, grad_norm=36.404, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.018e-04, train_time=1.654 +[gpub058:0/16] 2024-02-04 02:02:05,680 (trainer:737) INFO: 25epoch:train:8601-8700batch: iter_time=8.068e-05, forward_time=0.293, loss_ctc=52.505, loss_att=55.807, acc=0.748, loss=54.816, backward_time=0.405, grad_norm=37.867, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.017e-04, train_time=1.466 +[gpub058:0/16] 2024-02-04 02:03:42,011 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-04 02:04:01,274 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 02:04:05,163 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 02:04:05,163 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-04 02:04:05,167 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 02:10:26,661 (trainer:737) INFO: 25epoch:train:8701-8800batch: iter_time=3.395, forward_time=0.369, loss_ctc=44.650, loss_att=52.088, acc=0.732, loss=49.856, backward_time=0.413, grad_norm=39.555, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.017e-04, train_time=5.010 +[gpub058:0/16] 2024-02-04 02:12:50,157 (trainer:737) INFO: 25epoch:train:8801-8900batch: iter_time=0.003, forward_time=0.290, loss_ctc=42.626, loss_att=38.136, acc=0.771, loss=39.483, backward_time=0.400, grad_norm=33.264, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.017e-04, train_time=1.435 +[gpub058:0/16] 2024-02-04 02:15:26,554 (trainer:737) INFO: 25epoch:train:8901-9000batch: iter_time=8.488e-05, forward_time=0.289, loss_ctc=46.888, loss_att=43.580, acc=0.758, loss=44.572, backward_time=0.402, grad_norm=36.416, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.017e-04, train_time=1.563 +[gpub058:0/16] 2024-02-04 02:18:09,602 (trainer:737) INFO: 25epoch:train:9001-9100batch: iter_time=4.219e-04, forward_time=0.359, loss_ctc=47.971, loss_att=45.253, acc=0.755, loss=46.068, backward_time=0.487, grad_norm=36.502, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.016e-04, train_time=1.629 +[gpub058:0/16] 2024-02-04 02:20:51,130 (trainer:737) INFO: 25epoch:train:9101-9200batch: iter_time=8.687e-05, forward_time=0.288, loss_ctc=46.986, loss_att=44.255, acc=0.743, loss=45.074, backward_time=0.401, grad_norm=39.269, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.016e-04, train_time=1.616 +[gpub058:0/16] 2024-02-04 02:23:17,931 (trainer:737) INFO: 25epoch:train:9201-9300batch: iter_time=8.649e-05, forward_time=0.291, loss_ctc=46.953, loss_att=46.475, acc=0.739, loss=46.618, backward_time=0.408, grad_norm=36.423, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.016e-04, train_time=1.468 +[gpub058:0/16] 2024-02-04 02:25:48,812 (trainer:737) INFO: 25epoch:train:9301-9400batch: iter_time=5.314e-04, forward_time=0.289, loss_ctc=49.670, loss_att=46.439, acc=0.747, loss=47.408, backward_time=0.402, grad_norm=40.016, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.015e-04, train_time=1.508 +[gpub058:0/16] 2024-02-04 02:28:53,087 (trainer:737) INFO: 25epoch:train:9401-9500batch: iter_time=8.984e-05, forward_time=0.385, loss_ctc=47.181, loss_att=50.822, acc=0.733, loss=49.730, backward_time=0.473, grad_norm=36.901, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.015e-04, train_time=1.841 +[gpub058:0/16] 2024-02-04 02:31:18,018 (trainer:737) INFO: 25epoch:train:9501-9600batch: iter_time=9.044e-05, forward_time=0.291, loss_ctc=55.045, loss_att=51.207, acc=0.736, loss=52.359, backward_time=0.405, grad_norm=47.997, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.015e-04, train_time=1.450 +[gpub058:0/16] 2024-02-04 02:32:45,108 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 02:34:05,746 (trainer:737) INFO: 25epoch:train:9601-9700batch: iter_time=9.115e-05, forward_time=0.289, loss_ctc=45.674, loss_att=49.334, acc=0.738, loss=48.236, backward_time=0.403, grad_norm=36.640, clip=100.000, loss_scale=1.875e+33, optim_step_time=0.093, optim0_lr0=2.015e-04, train_time=1.677 +[gpub058:0/16] 2024-02-04 02:36:56,967 (trainer:737) INFO: 25epoch:train:9701-9800batch: iter_time=2.379e-04, forward_time=0.386, loss_ctc=51.926, loss_att=45.408, acc=0.745, loss=47.363, backward_time=0.453, grad_norm=41.969, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.100, optim0_lr0=2.014e-04, train_time=1.712 +[gpub058:0/16] 2024-02-04 02:39:54,546 (trainer:737) INFO: 25epoch:train:9801-9900batch: iter_time=9.081e-05, forward_time=0.300, loss_ctc=53.811, loss_att=53.978, acc=0.754, loss=53.928, backward_time=0.404, grad_norm=37.660, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.014e-04, train_time=1.774 +[gpub058:0/16] 2024-02-04 02:42:08,649 (trainer:737) INFO: 25epoch:train:9901-10000batch: iter_time=8.299e-05, forward_time=0.290, loss_ctc=46.271, loss_att=49.712, acc=0.744, loss=48.679, backward_time=0.404, grad_norm=36.443, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.014e-04, train_time=1.343 +[gpub058:0/16] 2024-02-04 02:42:28,677 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub058:0/16] 2024-02-04 02:42:48,730 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 02:42:52,661 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 02:42:52,661 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-04 02:42:52,665 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 02:50:42,165 (trainer:737) INFO: 25epoch:train:10001-10100batch: iter_time=3.471, forward_time=0.363, loss_ctc=42.461, loss_att=47.922, acc=0.744, loss=46.283, backward_time=0.512, grad_norm=36.472, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.014e-04, train_time=5.135 +[gpub058:0/16] 2024-02-04 02:53:09,032 (trainer:737) INFO: 25epoch:train:10101-10200batch: iter_time=8.167e-05, forward_time=0.290, loss_ctc=44.969, loss_att=37.719, acc=0.765, loss=39.894, backward_time=0.401, grad_norm=33.526, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.013e-04, train_time=1.469 +[gpub058:0/16] 2024-02-04 02:56:09,177 (trainer:737) INFO: 25epoch:train:10201-10300batch: iter_time=8.844e-05, forward_time=0.291, loss_ctc=44.632, loss_att=41.987, acc=0.763, loss=42.780, backward_time=0.402, grad_norm=35.537, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.104, optim0_lr0=2.013e-04, train_time=1.798 +[gpub058:0/16] 2024-02-04 02:58:39,646 (trainer:737) INFO: 25epoch:train:10301-10400batch: iter_time=8.311e-05, forward_time=0.289, loss_ctc=55.075, loss_att=53.593, acc=0.731, loss=54.037, backward_time=0.403, grad_norm=40.886, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.013e-04, train_time=1.508 +[gpub058:0/16] 2024-02-04 03:01:39,507 (trainer:737) INFO: 25epoch:train:10401-10500batch: iter_time=8.766e-05, forward_time=0.408, loss_ctc=43.474, loss_att=38.642, acc=0.747, loss=40.092, backward_time=0.443, grad_norm=33.309, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.096, optim0_lr0=2.012e-04, train_time=1.798 +[gpub058:0/16] 2024-02-04 03:04:19,286 (trainer:737) INFO: 25epoch:train:10501-10600batch: iter_time=8.774e-05, forward_time=0.296, loss_ctc=40.552, loss_att=41.128, acc=0.750, loss=40.955, backward_time=0.404, grad_norm=36.966, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.095, optim0_lr0=2.012e-04, train_time=1.597 +[gpub058:0/16] 2024-02-04 03:07:03,554 (trainer:737) INFO: 25epoch:train:10601-10700batch: iter_time=2.616e-04, forward_time=0.299, loss_ctc=49.253, loss_att=49.512, acc=0.739, loss=49.434, backward_time=0.406, grad_norm=38.573, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.012e-04, train_time=1.642 +[gpub058:0/16] 2024-02-04 03:09:59,044 (trainer:737) INFO: 25epoch:train:10701-10800batch: iter_time=9.297e-05, forward_time=0.377, loss_ctc=53.208, loss_att=52.126, acc=0.741, loss=52.451, backward_time=0.453, grad_norm=60.945, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.101, optim0_lr0=2.012e-04, train_time=1.756 +[gpub058:0/16] 2024-02-04 03:12:49,662 (trainer:737) INFO: 25epoch:train:10801-10900batch: iter_time=9.329e-05, forward_time=0.297, loss_ctc=53.605, loss_att=53.791, acc=0.735, loss=53.735, backward_time=0.404, grad_norm=46.151, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.094, optim0_lr0=2.011e-04, train_time=1.706 +[gpub058:0/16] 2024-02-04 03:15:24,576 (trainer:737) INFO: 25epoch:train:10901-11000batch: iter_time=8.863e-05, forward_time=0.290, loss_ctc=49.323, loss_att=45.680, acc=0.747, loss=46.773, backward_time=0.403, grad_norm=37.743, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.011e-04, train_time=1.549 +[gpub058:0/16] 2024-02-04 03:18:04,464 (trainer:737) INFO: 25epoch:train:11001-11100batch: iter_time=0.001, forward_time=0.291, loss_ctc=49.630, loss_att=45.911, acc=0.747, loss=47.026, backward_time=0.407, grad_norm=37.319, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.011e-04, train_time=1.597 +[gpub058:0/16] 2024-02-04 03:20:56,716 (trainer:737) INFO: 25epoch:train:11101-11200batch: iter_time=8.577e-05, forward_time=0.386, loss_ctc=52.236, loss_att=55.349, acc=0.744, loss=54.415, backward_time=0.455, grad_norm=37.527, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.096, optim0_lr0=2.011e-04, train_time=1.724 +[gpub058:0/16] 2024-02-04 03:22:22,748 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-04 03:22:42,016 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 03:22:45,952 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 03:22:45,952 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-04 03:22:45,955 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 03:28:40,139 (trainer:737) INFO: 25epoch:train:11201-11300batch: iter_time=3.174, forward_time=0.291, loss_ctc=43.919, loss_att=52.114, acc=0.735, loss=49.656, backward_time=0.406, grad_norm=35.901, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.010e-04, train_time=4.634 +[gpub058:0/16] 2024-02-04 03:31:17,515 (trainer:737) INFO: 25epoch:train:11301-11400batch: iter_time=7.534e-05, forward_time=0.291, loss_ctc=42.542, loss_att=38.426, acc=0.775, loss=39.661, backward_time=0.401, grad_norm=31.673, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.010e-04, train_time=1.574 +[gpub058:0/16] 2024-02-04 03:34:09,926 (trainer:737) INFO: 25epoch:train:11401-11500batch: iter_time=8.641e-05, forward_time=0.407, loss_ctc=46.179, loss_att=46.197, acc=0.761, loss=46.191, backward_time=0.453, grad_norm=37.708, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.095, optim0_lr0=2.010e-04, train_time=1.723 +[gpub058:0/16] 2024-02-04 03:37:02,004 (trainer:737) INFO: 25epoch:train:11501-11600batch: iter_time=9.001e-05, forward_time=0.295, loss_ctc=47.441, loss_att=45.054, acc=0.763, loss=45.770, backward_time=0.405, grad_norm=34.749, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.097, optim0_lr0=2.009e-04, train_time=1.721 +[gpub058:0/16] 2024-02-04 03:39:20,317 (trainer:737) INFO: 25epoch:train:11601-11700batch: iter_time=8.737e-05, forward_time=0.293, loss_ctc=46.918, loss_att=46.882, acc=0.748, loss=46.893, backward_time=0.405, grad_norm=34.923, clip=100.000, loss_scale=2.012e+33, optim_step_time=0.092, optim0_lr0=2.009e-04, train_time=1.383 +[gpub058:0/16] 2024-02-04 03:42:22,387 (trainer:737) INFO: 25epoch:train:11701-11800batch: iter_time=8.906e-05, forward_time=0.394, loss_ctc=46.702, loss_att=45.600, acc=0.751, loss=45.930, backward_time=0.465, grad_norm=36.550, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.009e-04, train_time=1.820 +[gpub058:0/16] 2024-02-04 03:45:12,389 (trainer:737) INFO: 25epoch:train:11801-11900batch: iter_time=8.892e-05, forward_time=0.293, loss_ctc=49.012, loss_att=46.495, acc=0.746, loss=47.250, backward_time=0.404, grad_norm=37.891, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.009e-04, train_time=1.700 +[gpub058:0/16] 2024-02-04 03:47:32,088 (trainer:737) INFO: 25epoch:train:11901-12000batch: iter_time=8.791e-05, forward_time=0.292, loss_ctc=46.519, loss_att=50.107, acc=0.746, loss=49.031, backward_time=0.406, grad_norm=35.931, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.008e-04, train_time=1.397 +[gpub058:0/16] 2024-02-04 03:50:42,176 (trainer:737) INFO: 25epoch:train:12001-12100batch: iter_time=8.250e-05, forward_time=0.430, loss_ctc=56.370, loss_att=51.072, acc=0.747, loss=52.661, backward_time=0.434, grad_norm=49.779, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.008e-04, train_time=1.899 +[gpub058:0/16] 2024-02-04 03:53:33,027 (trainer:737) INFO: 25epoch:train:12101-12200batch: iter_time=9.490e-05, forward_time=0.290, loss_ctc=45.527, loss_att=50.700, acc=0.751, loss=49.148, backward_time=0.407, grad_norm=35.704, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.008e-04, train_time=1.710 +[gpub058:0/16] 2024-02-04 03:55:57,739 (trainer:737) INFO: 25epoch:train:12201-12300batch: iter_time=8.158e-05, forward_time=0.290, loss_ctc=51.840, loss_att=45.665, acc=0.753, loss=47.517, backward_time=0.404, grad_norm=50.803, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.008e-04, train_time=1.447 +[gpub058:0/16] 2024-02-04 03:59:08,325 (trainer:737) INFO: 25epoch:train:12301-12400batch: iter_time=8.189e-05, forward_time=0.400, loss_ctc=53.573, loss_att=54.281, acc=0.760, loss=54.069, backward_time=0.448, grad_norm=34.777, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.007e-04, train_time=1.905 +[gpub058:0/16] 2024-02-04 04:01:50,143 (trainer:737) INFO: 25epoch:train:12401-12500batch: iter_time=2.251e-04, forward_time=0.292, loss_ctc=46.180, loss_att=49.203, acc=0.750, loss=48.296, backward_time=0.404, grad_norm=35.045, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.007e-04, train_time=1.618 +[gpub058:0/16] 2024-02-04 04:02:10,737 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-04 04:02:30,372 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 04:02:34,497 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 04:02:34,497 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-04 04:02:34,500 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 04:10:09,902 (trainer:737) INFO: 25epoch:train:12501-12600batch: iter_time=3.312, forward_time=0.289, loss_ctc=42.471, loss_att=48.995, acc=0.741, loss=47.038, backward_time=0.408, grad_norm=39.673, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.007e-04, train_time=4.998 +[gpub058:0/16] 2024-02-04 04:13:12,029 (trainer:737) INFO: 25epoch:train:12601-12700batch: iter_time=8.166e-05, forward_time=0.410, loss_ctc=44.996, loss_att=37.940, acc=0.767, loss=40.057, backward_time=0.438, grad_norm=35.885, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.108, optim0_lr0=2.006e-04, train_time=1.820 +[gpub058:0/16] 2024-02-04 04:16:00,017 (trainer:737) INFO: 25epoch:train:12701-12800batch: iter_time=8.576e-05, forward_time=0.289, loss_ctc=44.469, loss_att=42.040, acc=0.765, loss=42.769, backward_time=0.403, grad_norm=33.951, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.006e-04, train_time=1.680 +[gpub058:0/16] 2024-02-04 04:18:42,306 (trainer:737) INFO: 25epoch:train:12801-12900batch: iter_time=3.521e-04, forward_time=0.295, loss_ctc=54.178, loss_att=52.577, acc=0.733, loss=53.057, backward_time=0.404, grad_norm=40.249, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.006e-04, train_time=1.623 +[gpub058:0/16] 2024-02-04 04:21:41,356 (trainer:737) INFO: 25epoch:train:12901-13000batch: iter_time=8.351e-05, forward_time=0.408, loss_ctc=43.584, loss_att=38.972, acc=0.748, loss=40.355, backward_time=0.459, grad_norm=33.946, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.006e-04, train_time=1.790 +[gpub058:0/16] 2024-02-04 04:24:28,200 (trainer:737) INFO: 25epoch:train:13001-13100batch: iter_time=8.476e-05, forward_time=0.292, loss_ctc=40.546, loss_att=41.349, acc=0.749, loss=41.108, backward_time=0.401, grad_norm=34.266, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.005e-04, train_time=1.668 +[gpub058:0/16] 2024-02-04 04:27:12,274 (trainer:737) INFO: 25epoch:train:13101-13200batch: iter_time=1.652e-04, forward_time=0.292, loss_ctc=48.635, loss_att=49.828, acc=0.737, loss=49.470, backward_time=0.405, grad_norm=37.072, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.005e-04, train_time=1.640 +[gpub058:0/16] 2024-02-04 04:30:27,035 (trainer:737) INFO: 25epoch:train:13201-13300batch: iter_time=8.346e-05, forward_time=0.425, loss_ctc=51.977, loss_att=51.507, acc=0.742, loss=51.648, backward_time=0.444, grad_norm=41.127, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.005e-04, train_time=1.948 +[gpub058:0/16] 2024-02-04 04:33:06,870 (trainer:737) INFO: 25epoch:train:13301-13400batch: iter_time=8.262e-05, forward_time=0.291, loss_ctc=53.262, loss_att=53.815, acc=0.735, loss=53.649, backward_time=0.403, grad_norm=47.055, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.005e-04, train_time=1.598 +[gpub058:0/16] 2024-02-04 04:36:08,247 (trainer:737) INFO: 25epoch:train:13401-13500batch: iter_time=3.444e-04, forward_time=0.289, loss_ctc=49.580, loss_att=46.108, acc=0.745, loss=47.150, backward_time=0.401, grad_norm=37.640, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.004e-04, train_time=1.813 +[gpub058:0/16] 2024-02-04 04:38:58,926 (trainer:737) INFO: 25epoch:train:13501-13600batch: iter_time=8.852e-05, forward_time=0.396, loss_ctc=49.370, loss_att=45.434, acc=0.751, loss=46.615, backward_time=0.496, grad_norm=36.754, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.004e-04, train_time=1.707 +[gpub058:0/16] 2024-02-04 04:41:45,563 (trainer:737) INFO: 25epoch:train:13601-13700batch: iter_time=8.918e-05, forward_time=0.294, loss_ctc=52.203, loss_att=55.486, acc=0.744, loss=54.501, backward_time=0.403, grad_norm=37.336, clip=100.000, loss_scale=4.024e+33, optim_step_time=0.098, optim0_lr0=2.004e-04, train_time=1.665 +[gpub058:0/16] 2024-02-04 04:43:20,616 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-04 04:43:39,993 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 04:43:43,587 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 04:43:43,587 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-04 04:43:43,591 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 04:49:53,638 (trainer:737) INFO: 25epoch:train:13701-13800batch: iter_time=3.324, forward_time=0.379, loss_ctc=43.559, loss_att=50.528, acc=0.736, loss=48.437, backward_time=0.418, grad_norm=37.773, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.004e-04, train_time=4.881 +[gpub058:0/16] 2024-02-04 04:52:49,440 (trainer:737) INFO: 25epoch:train:13801-13900batch: iter_time=8.402e-05, forward_time=0.288, loss_ctc=42.249, loss_att=37.597, acc=0.773, loss=38.993, backward_time=0.399, grad_norm=33.472, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.003e-04, train_time=1.758 +[gpub058:0/16] 2024-02-04 04:55:27,128 (trainer:737) INFO: 25epoch:train:13901-14000batch: iter_time=7.968e-05, forward_time=0.295, loss_ctc=46.259, loss_att=42.874, acc=0.759, loss=43.890, backward_time=0.404, grad_norm=36.314, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.003e-04, train_time=1.577 +[gpub058:0/16] 2024-02-04 04:58:34,633 (trainer:737) INFO: 25epoch:train:14001-14100batch: iter_time=8.413e-05, forward_time=0.404, loss_ctc=47.800, loss_att=45.173, acc=0.757, loss=45.961, backward_time=0.464, grad_norm=35.610, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.003e-04, train_time=1.875 +[gpub058:0/16] 2024-02-04 05:01:23,672 (trainer:737) INFO: 25epoch:train:14101-14200batch: iter_time=1.222e-04, forward_time=0.292, loss_ctc=47.112, loss_att=42.719, acc=0.746, loss=44.037, backward_time=0.402, grad_norm=34.884, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.002e-04, train_time=1.690 +[gpub058:0/16] 2024-02-04 05:04:01,340 (trainer:737) INFO: 25epoch:train:14201-14300batch: iter_time=2.850e-04, forward_time=0.296, loss_ctc=46.855, loss_att=46.431, acc=0.740, loss=46.558, backward_time=0.408, grad_norm=39.896, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.002e-04, train_time=1.576 +[gpub058:0/16] 2024-02-04 05:07:38,357 (trainer:737) INFO: 25epoch:train:14301-14400batch: iter_time=9.645e-05, forward_time=0.378, loss_ctc=49.178, loss_att=45.541, acc=0.752, loss=46.633, backward_time=0.483, grad_norm=36.560, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.002e-04, train_time=2.169 +[gpub058:0/16] 2024-02-04 05:10:11,755 (trainer:737) INFO: 25epoch:train:14401-14500batch: iter_time=9.796e-05, forward_time=0.291, loss_ctc=46.641, loss_att=50.154, acc=0.737, loss=49.100, backward_time=0.403, grad_norm=36.071, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.002e-04, train_time=1.535 +[gpub058:0/16] 2024-02-04 05:12:57,592 (trainer:737) INFO: 25epoch:train:14501-14600batch: iter_time=1.330e-04, forward_time=0.308, loss_ctc=56.767, loss_att=50.561, acc=0.739, loss=52.423, backward_time=0.404, grad_norm=47.550, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.001e-04, train_time=1.658 +[gpub058:0/16] 2024-02-04 05:16:29,655 (trainer:737) INFO: 25epoch:train:14601-14700batch: iter_time=8.829e-05, forward_time=0.385, loss_ctc=45.288, loss_att=49.321, acc=0.736, loss=48.111, backward_time=0.427, grad_norm=36.488, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=2.001e-04, train_time=2.120 +[gpub058:0/16] 2024-02-04 05:19:09,652 (trainer:737) INFO: 25epoch:train:14701-14800batch: iter_time=8.022e-05, forward_time=0.292, loss_ctc=51.063, loss_att=44.862, acc=0.746, loss=46.722, backward_time=0.403, grad_norm=39.733, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.001e-04, train_time=1.600 +[gpub058:0/16] 2024-02-04 05:22:29,223 (trainer:737) INFO: 25epoch:train:14801-14900batch: iter_time=6.499e-04, forward_time=0.466, loss_ctc=54.240, loss_att=53.555, acc=0.754, loss=53.760, backward_time=0.424, grad_norm=36.372, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.001e-04, train_time=1.995 +[gpub058:0/16] 2024-02-04 05:25:25,168 (trainer:737) INFO: 25epoch:train:14901-15000batch: iter_time=8.168e-05, forward_time=0.292, loss_ctc=45.863, loss_att=49.066, acc=0.745, loss=48.105, backward_time=0.403, grad_norm=37.147, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.000e-04, train_time=1.759 +[gpub058:0/16] 2024-02-04 06:04:15,182 (trainer:343) INFO: 25epoch results: [train] iter_time=0.267, forward_time=0.339, loss_ctc=48.695, loss_att=47.619, acc=0.748, loss=47.942, backward_time=0.422, grad_norm=38.229, clip=100.000, loss_scale=6.095e+33, optim_step_time=0.096, optim0_lr0=2.021e-04, train_time=1.921, time=8 hours and 37.94 seconds, total_count=405000, gpu_max_cached_mem_GB=43.281, [valid] loss_ctc=38.945, cer_ctc=0.196, loss_att=39.437, acc=0.680, cer=0.315, wer=0.993, loss=39.289, time=38 minutes and 25.98 seconds, total_count=126117, gpu_max_cached_mem_GB=43.281 +[gpub058:0/16] 2024-02-04 06:04:25,602 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub058:0/16] 2024-02-04 06:04:25,658 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/19epoch.pth, exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/20epoch.pth +[gpub058:0/16] 2024-02-04 06:04:25,658 (trainer:272) INFO: 26/45epoch started. Estimated time to finish: 6 days, 23 hours and 36 minutes +[gpub058:0/16] 2024-02-04 06:04:25,668 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-04 06:04:44,164 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 06:04:47,616 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 06:04:47,616 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-04 06:04:47,619 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 06:10:51,539 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 06:11:39,815 (trainer:737) INFO: 26epoch:train:1-100batch: iter_time=2.877, forward_time=0.399, loss_ctc=42.997, loss_att=39.840, acc=0.753, loss=40.787, backward_time=0.410, grad_norm=34.904, clip=100.000, loss_scale=4.274e+33, optim_step_time=0.093, optim0_lr0=2.000e-04, train_time=4.341 +[gpub058:0/16] 2024-02-04 06:14:07,999 (trainer:737) INFO: 26epoch:train:101-200batch: iter_time=8.280e-05, forward_time=0.289, loss_ctc=48.175, loss_att=44.470, acc=0.732, loss=45.582, backward_time=0.403, grad_norm=38.764, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.000e-04, train_time=1.482 +[gpub058:0/16] 2024-02-04 06:16:29,175 (trainer:737) INFO: 26epoch:train:201-300batch: iter_time=8.096e-05, forward_time=0.291, loss_ctc=45.944, loss_att=41.575, acc=0.750, loss=42.886, backward_time=0.404, grad_norm=37.171, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.000e-04, train_time=1.409 +[gpub058:0/16] 2024-02-04 06:19:25,824 (trainer:737) INFO: 26epoch:train:301-400batch: iter_time=8.780e-05, forward_time=0.403, loss_ctc=49.179, loss_att=54.813, acc=0.737, loss=53.122, backward_time=0.448, grad_norm=37.761, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.117, optim0_lr0=1.999e-04, train_time=1.768 +[gpub058:0/16] 2024-02-04 06:21:46,054 (trainer:737) INFO: 26epoch:train:401-500batch: iter_time=8.175e-05, forward_time=0.289, loss_ctc=48.180, loss_att=43.175, acc=0.752, loss=44.676, backward_time=0.403, grad_norm=36.593, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.999e-04, train_time=1.402 +[gpub058:0/16] 2024-02-04 06:24:18,569 (trainer:737) INFO: 26epoch:train:501-600batch: iter_time=8.643e-05, forward_time=0.289, loss_ctc=53.651, loss_att=44.773, acc=0.757, loss=47.436, backward_time=0.403, grad_norm=40.225, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.999e-04, train_time=1.525 +[gpub058:0/16] 2024-02-04 06:26:44,698 (trainer:737) INFO: 26epoch:train:601-700batch: iter_time=7.967e-05, forward_time=0.320, loss_ctc=50.965, loss_att=54.554, acc=0.725, loss=53.478, backward_time=0.409, grad_norm=38.592, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.998e-04, train_time=1.460 +[gpub058:0/16] 2024-02-04 06:29:39,384 (trainer:737) INFO: 26epoch:train:701-800batch: iter_time=8.690e-05, forward_time=0.352, loss_ctc=45.157, loss_att=44.738, acc=0.731, loss=44.864, backward_time=0.449, grad_norm=36.758, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.998e-04, train_time=1.748 +[gpub058:0/16] 2024-02-04 06:32:00,304 (trainer:737) INFO: 26epoch:train:801-900batch: iter_time=9.442e-05, forward_time=0.288, loss_ctc=49.692, loss_att=42.318, acc=0.748, loss=44.531, backward_time=0.402, grad_norm=45.053, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.998e-04, train_time=1.409 +[gpub058:0/16] 2024-02-04 06:34:45,002 (trainer:737) INFO: 26epoch:train:901-1000batch: iter_time=9.040e-05, forward_time=0.302, loss_ctc=59.491, loss_att=59.083, acc=0.724, loss=59.205, backward_time=0.418, grad_norm=48.333, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.998e-04, train_time=1.645 +[gpub058:0/16] 2024-02-04 06:37:25,868 (trainer:737) INFO: 26epoch:train:1001-1100batch: iter_time=1.032e-04, forward_time=0.290, loss_ctc=53.671, loss_att=52.135, acc=0.722, loss=52.596, backward_time=0.440, grad_norm=48.378, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.997e-04, train_time=1.610 +[gpub058:0/16] 2024-02-04 06:40:23,497 (trainer:737) INFO: 26epoch:train:1101-1200batch: iter_time=9.966e-05, forward_time=0.403, loss_ctc=61.127, loss_att=63.294, acc=0.712, loss=62.644, backward_time=0.431, grad_norm=41.967, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.997e-04, train_time=1.776 +[gpub058:0/16] 2024-02-04 06:42:08,427 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-04 06:42:27,452 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 06:42:31,005 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 06:42:31,005 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-04 06:42:31,008 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 06:48:50,663 (trainer:737) INFO: 26epoch:train:1201-1300batch: iter_time=3.305, forward_time=0.288, loss_ctc=41.678, loss_att=44.027, acc=0.744, loss=43.322, backward_time=0.399, grad_norm=33.238, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.997e-04, train_time=5.071 +[gpub058:0/16] 2024-02-04 06:51:07,784 (trainer:737) INFO: 26epoch:train:1301-1400batch: iter_time=8.380e-05, forward_time=0.296, loss_ctc=46.864, loss_att=43.055, acc=0.751, loss=44.198, backward_time=0.406, grad_norm=35.907, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.997e-04, train_time=1.371 +[gpub058:0/16] 2024-02-04 06:54:13,739 (trainer:737) INFO: 26epoch:train:1401-1500batch: iter_time=8.285e-05, forward_time=0.423, loss_ctc=44.759, loss_att=41.968, acc=0.744, loss=42.805, backward_time=0.439, grad_norm=35.511, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.996e-04, train_time=1.860 +[gpub058:0/16] 2024-02-04 06:57:02,797 (trainer:737) INFO: 26epoch:train:1501-1600batch: iter_time=8.266e-05, forward_time=0.292, loss_ctc=47.121, loss_att=44.181, acc=0.756, loss=45.063, backward_time=0.402, grad_norm=35.090, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.996e-04, train_time=1.690 +[gpub058:0/16] 2024-02-04 06:59:37,238 (trainer:737) INFO: 26epoch:train:1601-1700batch: iter_time=8.528e-05, forward_time=0.292, loss_ctc=46.141, loss_att=51.108, acc=0.742, loss=49.618, backward_time=0.413, grad_norm=37.077, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.996e-04, train_time=1.544 +[gpub058:0/16] 2024-02-04 07:02:14,661 (trainer:737) INFO: 26epoch:train:1701-1800batch: iter_time=7.974e-05, forward_time=0.304, loss_ctc=53.706, loss_att=45.265, acc=0.751, loss=47.798, backward_time=0.407, grad_norm=40.875, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.996e-04, train_time=1.574 +[gpub058:0/16] 2024-02-04 07:04:57,729 (trainer:737) INFO: 26epoch:train:1801-1900batch: iter_time=8.273e-05, forward_time=0.402, loss_ctc=50.855, loss_att=47.155, acc=0.752, loss=48.265, backward_time=0.413, grad_norm=38.611, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.995e-04, train_time=1.631 +[gpub058:0/16] 2024-02-04 07:07:51,696 (trainer:737) INFO: 26epoch:train:1901-2000batch: iter_time=8.613e-05, forward_time=0.288, loss_ctc=44.829, loss_att=48.337, acc=0.739, loss=47.284, backward_time=0.401, grad_norm=35.006, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.995e-04, train_time=1.739 +[gpub058:0/16] 2024-02-04 07:10:11,945 (trainer:737) INFO: 26epoch:train:2001-2100batch: iter_time=8.319e-05, forward_time=0.302, loss_ctc=46.037, loss_att=43.363, acc=0.736, loss=44.165, backward_time=0.406, grad_norm=39.127, clip=100.000, loss_scale=3.505e+33, optim_step_time=0.097, optim0_lr0=1.995e-04, train_time=1.402 +[gpub058:0/16] 2024-02-04 07:13:14,368 (trainer:737) INFO: 26epoch:train:2101-2200batch: iter_time=8.227e-05, forward_time=0.413, loss_ctc=52.216, loss_att=49.429, acc=0.739, loss=50.265, backward_time=0.438, grad_norm=43.141, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.994e-04, train_time=1.824 +[gpub058:0/16] 2024-02-04 07:15:52,216 (trainer:737) INFO: 26epoch:train:2201-2300batch: iter_time=8.653e-05, forward_time=0.287, loss_ctc=49.317, loss_att=47.519, acc=0.732, loss=48.058, backward_time=0.404, grad_norm=40.936, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.994e-04, train_time=1.579 +[gpub058:0/16] 2024-02-04 07:18:30,058 (trainer:737) INFO: 26epoch:train:2301-2400batch: iter_time=8.896e-05, forward_time=0.308, loss_ctc=58.260, loss_att=63.861, acc=0.711, loss=62.180, backward_time=0.410, grad_norm=46.176, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.994e-04, train_time=1.578 +[gpub058:0/16] 2024-02-04 07:21:09,530 (trainer:737) INFO: 26epoch:train:2401-2500batch: iter_time=8.091e-05, forward_time=0.373, loss_ctc=53.494, loss_att=55.686, acc=0.729, loss=55.029, backward_time=0.460, grad_norm=38.076, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.994e-04, train_time=1.594 +[gpub058:0/16] 2024-02-04 07:21:29,582 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-04 07:21:49,072 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 07:21:52,671 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 07:21:52,671 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-04 07:21:52,674 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 07:29:23,752 (trainer:737) INFO: 26epoch:train:2501-2600batch: iter_time=3.405, forward_time=0.297, loss_ctc=41.943, loss_att=38.699, acc=0.762, loss=39.672, backward_time=0.399, grad_norm=34.398, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.993e-04, train_time=4.942 +[gpub058:0/16] 2024-02-04 07:32:14,525 (trainer:737) INFO: 26epoch:train:2601-2700batch: iter_time=7.796e-05, forward_time=0.297, loss_ctc=46.636, loss_att=43.623, acc=0.736, loss=44.527, backward_time=0.409, grad_norm=39.112, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.993e-04, train_time=1.708 +[gpub058:0/16] 2024-02-04 07:35:27,338 (trainer:737) INFO: 26epoch:train:2701-2800batch: iter_time=8.196e-05, forward_time=0.362, loss_ctc=44.611, loss_att=41.213, acc=0.756, loss=42.232, backward_time=0.413, grad_norm=34.407, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.993e-04, train_time=1.928 +[gpub058:0/16] 2024-02-04 07:38:00,344 (trainer:737) INFO: 26epoch:train:2801-2900batch: iter_time=8.086e-05, forward_time=0.369, loss_ctc=48.082, loss_att=54.261, acc=0.741, loss=52.408, backward_time=0.418, grad_norm=36.431, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.993e-04, train_time=1.529 +[gpub058:0/16] 2024-02-04 07:40:53,027 (trainer:737) INFO: 26epoch:train:2901-3000batch: iter_time=7.872e-05, forward_time=0.288, loss_ctc=47.037, loss_att=41.818, acc=0.757, loss=43.384, backward_time=0.401, grad_norm=35.115, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.992e-04, train_time=1.727 +[gpub058:0/16] 2024-02-04 07:43:51,703 (trainer:737) INFO: 26epoch:train:3001-3100batch: iter_time=8.057e-05, forward_time=0.294, loss_ctc=52.217, loss_att=43.780, acc=0.761, loss=46.311, backward_time=0.421, grad_norm=43.464, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.992e-04, train_time=1.787 +[gpub058:0/16] 2024-02-04 07:46:42,512 (trainer:737) INFO: 26epoch:train:3101-3200batch: iter_time=8.446e-05, forward_time=0.404, loss_ctc=48.840, loss_att=53.450, acc=0.728, loss=52.067, backward_time=0.469, grad_norm=39.953, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.992e-04, train_time=1.708 +[gpub058:0/16] 2024-02-04 07:49:36,470 (trainer:737) INFO: 26epoch:train:3201-3300batch: iter_time=8.117e-05, forward_time=0.288, loss_ctc=45.166, loss_att=44.739, acc=0.733, loss=44.867, backward_time=0.399, grad_norm=38.194, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.992e-04, train_time=1.739 +[gpub058:0/16] 2024-02-04 07:52:19,286 (trainer:737) INFO: 26epoch:train:3301-3400batch: iter_time=8.603e-05, forward_time=0.303, loss_ctc=47.176, loss_att=40.976, acc=0.754, loss=42.836, backward_time=0.405, grad_norm=40.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.991e-04, train_time=1.627 +[gpub058:0/16] 2024-02-04 07:54:55,093 (trainer:737) INFO: 26epoch:train:3401-3500batch: iter_time=8.124e-05, forward_time=0.369, loss_ctc=56.462, loss_att=58.065, acc=0.726, loss=57.584, backward_time=0.460, grad_norm=41.161, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.991e-04, train_time=1.559 +[gpub058:0/16] 2024-02-04 07:58:02,037 (trainer:737) INFO: 26epoch:train:3501-3600batch: iter_time=8.315e-05, forward_time=0.304, loss_ctc=51.082, loss_att=51.264, acc=0.726, loss=51.209, backward_time=0.402, grad_norm=45.637, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.991e-04, train_time=1.869 +[gpub058:0/16] 2024-02-04 08:00:24,581 (trainer:737) INFO: 26epoch:train:3601-3700batch: iter_time=8.136e-05, forward_time=0.304, loss_ctc=59.511, loss_att=62.028, acc=0.717, loss=61.273, backward_time=0.409, grad_norm=40.177, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.991e-04, train_time=1.425 +[gpub058:0/16] 2024-02-04 08:01:54,279 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-04 08:02:13,289 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 08:02:17,352 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 08:02:17,352 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-04 08:02:17,356 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 08:08:37,044 (trainer:737) INFO: 26epoch:train:3701-3800batch: iter_time=3.390, forward_time=0.396, loss_ctc=40.288, loss_att=44.776, acc=0.751, loss=43.430, backward_time=0.427, grad_norm=33.584, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.990e-04, train_time=4.924 +[gpub058:0/16] 2024-02-04 08:10:51,186 (trainer:737) INFO: 26epoch:train:3801-3900batch: iter_time=8.191e-05, forward_time=0.290, loss_ctc=46.260, loss_att=45.089, acc=0.749, loss=45.440, backward_time=0.404, grad_norm=35.724, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.990e-04, train_time=1.341 +[gpub058:0/16] 2024-02-04 08:13:41,882 (trainer:737) INFO: 26epoch:train:3901-4000batch: iter_time=8.572e-05, forward_time=0.303, loss_ctc=43.398, loss_att=42.149, acc=0.752, loss=42.524, backward_time=0.407, grad_norm=34.421, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.990e-04, train_time=1.707 +[gpub058:0/16] 2024-02-04 08:16:22,845 (trainer:737) INFO: 26epoch:train:4001-4100batch: iter_time=9.220e-05, forward_time=0.389, loss_ctc=46.585, loss_att=44.853, acc=0.760, loss=45.372, backward_time=0.450, grad_norm=35.826, clip=100.000, loss_scale=7.010e+33, optim_step_time=0.096, optim0_lr0=1.989e-04, train_time=1.609 +[gpub058:0/16] 2024-02-04 08:18:54,381 (trainer:737) INFO: 26epoch:train:4101-4200batch: iter_time=9.151e-05, forward_time=0.290, loss_ctc=45.596, loss_att=51.141, acc=0.751, loss=49.478, backward_time=0.402, grad_norm=37.500, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.989e-04, train_time=1.515 +[gpub058:0/16] 2024-02-04 08:21:36,637 (trainer:737) INFO: 26epoch:train:4201-4300batch: iter_time=8.990e-05, forward_time=0.308, loss_ctc=52.332, loss_att=47.114, acc=0.752, loss=48.680, backward_time=0.408, grad_norm=39.984, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.989e-04, train_time=1.622 +[gpub058:0/16] 2024-02-04 08:24:16,578 (trainer:737) INFO: 26epoch:train:4301-4400batch: iter_time=8.099e-04, forward_time=0.412, loss_ctc=49.317, loss_att=47.463, acc=0.755, loss=48.019, backward_time=0.436, grad_norm=38.438, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.989e-04, train_time=1.599 +[gpub058:0/16] 2024-02-04 08:25:58,764 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 08:26:50,806 (trainer:737) INFO: 26epoch:train:4401-4500batch: iter_time=8.880e-05, forward_time=0.298, loss_ctc=44.556, loss_att=48.497, acc=0.753, loss=47.315, backward_time=0.408, grad_norm=34.418, clip=100.000, loss_scale=8.969e+33, optim_step_time=0.095, optim0_lr0=1.988e-04, train_time=1.542 +[gpub058:0/16] 2024-02-04 08:29:30,356 (trainer:737) INFO: 26epoch:train:4501-4600batch: iter_time=8.681e-05, forward_time=0.356, loss_ctc=45.654, loss_att=46.034, acc=0.744, loss=45.920, backward_time=0.434, grad_norm=38.111, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.988e-04, train_time=1.596 +[gpub058:0/16] 2024-02-04 08:31:50,825 (trainer:737) INFO: 26epoch:train:4601-4700batch: iter_time=9.409e-05, forward_time=0.291, loss_ctc=50.666, loss_att=49.498, acc=0.744, loss=49.848, backward_time=0.405, grad_norm=41.012, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.988e-04, train_time=1.404 +[gpub058:0/16] 2024-02-04 08:34:09,086 (trainer:737) INFO: 26epoch:train:4701-4800batch: iter_time=3.820e-04, forward_time=0.298, loss_ctc=48.124, loss_att=49.244, acc=0.744, loss=48.908, backward_time=0.409, grad_norm=40.175, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.988e-04, train_time=1.381 +[gpub058:0/16] 2024-02-04 08:37:24,534 (trainer:737) INFO: 26epoch:train:4801-4900batch: iter_time=9.432e-05, forward_time=0.426, loss_ctc=56.738, loss_att=66.829, acc=0.709, loss=63.802, backward_time=0.453, grad_norm=44.198, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.987e-04, train_time=1.956 +[gpub058:0/16] 2024-02-04 08:39:40,073 (trainer:737) INFO: 26epoch:train:4901-5000batch: iter_time=8.310e-05, forward_time=0.293, loss_ctc=53.583, loss_att=55.226, acc=0.741, loss=54.733, backward_time=0.411, grad_norm=39.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.987e-04, train_time=1.355 +[gpub058:0/16] 2024-02-04 08:40:00,187 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-04 08:40:19,704 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 08:40:23,365 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 08:40:23,366 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-04 08:40:23,369 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 08:47:47,099 (trainer:737) INFO: 26epoch:train:5001-5100batch: iter_time=3.360, forward_time=0.395, loss_ctc=41.662, loss_att=39.157, acc=0.762, loss=39.908, backward_time=0.414, grad_norm=33.193, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.987e-04, train_time=4.870 +[gpub058:0/16] 2024-02-04 08:50:24,480 (trainer:737) INFO: 26epoch:train:5101-5200batch: iter_time=8.135e-05, forward_time=0.289, loss_ctc=45.716, loss_att=44.288, acc=0.737, loss=44.716, backward_time=0.401, grad_norm=38.200, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.987e-04, train_time=1.574 +[gpub058:0/16] 2024-02-04 08:53:22,222 (trainer:737) INFO: 26epoch:train:5201-5300batch: iter_time=0.108, forward_time=0.379, loss_ctc=43.861, loss_att=40.525, acc=0.759, loss=41.526, backward_time=0.466, grad_norm=34.392, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.986e-04, train_time=1.777 +[gpub058:0/16] 2024-02-04 08:58:00,277 (trainer:737) INFO: 26epoch:train:5301-5400batch: iter_time=0.045, forward_time=0.290, loss_ctc=47.232, loss_att=54.736, acc=0.739, loss=52.485, backward_time=0.401, grad_norm=37.885, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.986e-04, train_time=2.780 +[gpub058:0/16] 2024-02-04 09:00:19,063 (trainer:737) INFO: 26epoch:train:5401-5500batch: iter_time=7.852e-05, forward_time=0.289, loss_ctc=47.165, loss_att=42.506, acc=0.756, loss=43.904, backward_time=0.403, grad_norm=34.238, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.986e-04, train_time=1.388 +[gpub058:0/16] 2024-02-04 09:05:39,622 (trainer:737) INFO: 26epoch:train:5501-5600batch: iter_time=8.935e-04, forward_time=0.423, loss_ctc=50.669, loss_att=43.387, acc=0.766, loss=45.572, backward_time=0.482, grad_norm=38.338, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.986e-04, train_time=3.205 +[gpub058:0/16] 2024-02-04 09:08:09,632 (trainer:737) INFO: 26epoch:train:5601-5700batch: iter_time=8.241e-05, forward_time=0.292, loss_ctc=47.834, loss_att=53.221, acc=0.731, loss=51.605, backward_time=0.404, grad_norm=38.049, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.985e-04, train_time=1.500 +[gpub058:0/16] 2024-02-04 09:11:18,999 (trainer:737) INFO: 26epoch:train:5701-5800batch: iter_time=9.639e-05, forward_time=0.287, loss_ctc=44.208, loss_att=44.811, acc=0.734, loss=44.630, backward_time=0.400, grad_norm=36.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.985e-04, train_time=1.893 +[gpub058:0/16] 2024-02-04 09:15:20,005 (trainer:737) INFO: 26epoch:train:5801-5900batch: iter_time=8.340e-05, forward_time=0.499, loss_ctc=46.103, loss_att=41.024, acc=0.754, loss=42.547, backward_time=0.452, grad_norm=40.395, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.985e-04, train_time=2.409 +[gpub058:0/16] 2024-02-04 09:18:24,680 (trainer:737) INFO: 26epoch:train:5901-6000batch: iter_time=8.895e-05, forward_time=0.295, loss_ctc=55.294, loss_att=57.743, acc=0.729, loss=57.009, backward_time=0.405, grad_norm=43.373, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.984e-04, train_time=1.848 +[gpub058:0/16] 2024-02-04 09:21:14,178 (trainer:737) INFO: 26epoch:train:6001-6100batch: iter_time=7.153e-04, forward_time=0.305, loss_ctc=50.067, loss_att=51.151, acc=0.728, loss=50.826, backward_time=0.458, grad_norm=45.001, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.984e-04, train_time=1.694 +[gpub058:0/16] 2024-02-04 09:23:51,222 (trainer:737) INFO: 26epoch:train:6101-6200batch: iter_time=8.656e-05, forward_time=0.384, loss_ctc=59.535, loss_att=62.192, acc=0.715, loss=61.395, backward_time=0.417, grad_norm=41.198, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.984e-04, train_time=1.571 +[gpub058:0/16] 2024-02-04 09:25:16,473 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-04 09:25:35,751 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 09:25:39,336 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 09:25:39,336 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-04 09:25:39,339 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 09:32:02,690 (trainer:737) INFO: 26epoch:train:6201-6300batch: iter_time=3.427, forward_time=0.289, loss_ctc=40.603, loss_att=44.265, acc=0.752, loss=43.167, backward_time=0.402, grad_norm=33.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.984e-04, train_time=4.915 +[gpub058:0/16] 2024-02-04 09:34:44,931 (trainer:737) INFO: 26epoch:train:6301-6400batch: iter_time=8.789e-05, forward_time=0.422, loss_ctc=45.489, loss_att=44.552, acc=0.753, loss=44.833, backward_time=0.426, grad_norm=35.236, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.983e-04, train_time=1.622 +[gpub058:0/16] 2024-02-04 09:37:28,931 (trainer:737) INFO: 26epoch:train:6401-6500batch: iter_time=8.031e-05, forward_time=0.290, loss_ctc=43.028, loss_att=41.779, acc=0.753, loss=42.154, backward_time=0.404, grad_norm=35.064, clip=100.000, loss_scale=6.594e+33, optim_step_time=0.092, optim0_lr0=1.983e-04, train_time=1.640 +[gpub058:0/16] 2024-02-04 09:40:00,185 (trainer:737) INFO: 26epoch:train:6501-6600batch: iter_time=7.913e-05, forward_time=0.291, loss_ctc=46.363, loss_att=44.379, acc=0.765, loss=44.974, backward_time=0.404, grad_norm=36.956, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.983e-04, train_time=1.513 +[gpub058:0/16] 2024-02-04 09:42:37,561 (trainer:737) INFO: 26epoch:train:6601-6700batch: iter_time=8.432e-05, forward_time=0.377, loss_ctc=45.380, loss_att=50.399, acc=0.754, loss=48.893, backward_time=0.454, grad_norm=37.432, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=1.983e-04, train_time=1.572 +[gpub058:0/16] 2024-02-04 09:45:26,547 (trainer:737) INFO: 26epoch:train:6701-6800batch: iter_time=8.191e-05, forward_time=0.290, loss_ctc=51.512, loss_att=46.483, acc=0.755, loss=47.992, backward_time=0.403, grad_norm=40.172, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.982e-04, train_time=1.691 +[gpub058:0/16] 2024-02-04 09:48:07,509 (trainer:737) INFO: 26epoch:train:6801-6900batch: iter_time=8.404e-05, forward_time=0.323, loss_ctc=48.365, loss_att=47.655, acc=0.755, loss=47.868, backward_time=0.407, grad_norm=37.171, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.982e-04, train_time=1.610 +[gpub058:0/16] 2024-02-04 09:50:35,857 (trainer:737) INFO: 26epoch:train:6901-7000batch: iter_time=8.153e-05, forward_time=0.420, loss_ctc=43.967, loss_att=48.246, acc=0.755, loss=46.963, backward_time=0.438, grad_norm=33.767, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.982e-04, train_time=1.482 +[gpub058:0/16] 2024-02-04 09:52:50,481 (trainer:737) INFO: 26epoch:train:7001-7100batch: iter_time=7.645e-05, forward_time=0.291, loss_ctc=44.538, loss_att=44.972, acc=0.747, loss=44.842, backward_time=0.404, grad_norm=40.764, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.982e-04, train_time=1.347 +[gpub058:0/16] 2024-02-04 09:56:06,665 (trainer:737) INFO: 26epoch:train:7101-7200batch: iter_time=8.113e-05, forward_time=0.425, loss_ctc=50.402, loss_att=48.836, acc=0.747, loss=49.306, backward_time=0.437, grad_norm=43.191, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.981e-04, train_time=1.962 +[gpub058:0/16] 2024-02-04 09:56:47,480 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 09:58:25,895 (trainer:737) INFO: 26epoch:train:7201-7300batch: iter_time=9.247e-05, forward_time=0.290, loss_ctc=48.127, loss_att=48.715, acc=0.745, loss=48.539, backward_time=0.405, grad_norm=42.169, clip=100.000, loss_scale=6.608e+33, optim_step_time=0.092, optim0_lr0=1.981e-04, train_time=1.391 +[gpub058:0/16] 2024-02-04 10:01:10,036 (trainer:737) INFO: 26epoch:train:7301-7400batch: iter_time=8.514e-05, forward_time=0.359, loss_ctc=56.387, loss_att=66.384, acc=0.712, loss=63.385, backward_time=0.454, grad_norm=47.905, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.981e-04, train_time=1.642 +[gpub058:0/16] 2024-02-04 10:04:06,350 (trainer:737) INFO: 26epoch:train:7401-7500batch: iter_time=8.808e-05, forward_time=0.327, loss_ctc=53.493, loss_att=55.064, acc=0.741, loss=54.593, backward_time=0.414, grad_norm=36.975, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.981e-04, train_time=1.763 +[gpub058:0/16] 2024-02-04 10:04:26,378 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-04 10:04:45,481 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 10:04:49,088 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 10:04:49,088 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-04 10:04:49,091 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 10:12:29,665 (trainer:737) INFO: 26epoch:train:7501-7600batch: iter_time=3.549, forward_time=0.288, loss_ctc=41.426, loss_att=38.479, acc=0.767, loss=39.363, backward_time=0.401, grad_norm=32.340, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.980e-04, train_time=5.031 +[gpub058:0/16] 2024-02-04 10:15:29,643 (trainer:737) INFO: 26epoch:train:7601-7700batch: iter_time=8.464e-05, forward_time=0.330, loss_ctc=45.081, loss_att=43.168, acc=0.749, loss=43.742, backward_time=0.430, grad_norm=38.288, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.980e-04, train_time=1.801 +[gpub058:0/16] 2024-02-04 10:18:05,204 (trainer:737) INFO: 26epoch:train:7701-7800batch: iter_time=9.493e-05, forward_time=0.339, loss_ctc=43.292, loss_att=40.140, acc=0.769, loss=41.086, backward_time=0.413, grad_norm=32.840, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.980e-04, train_time=1.554 +[gpub058:0/16] 2024-02-04 10:20:52,014 (trainer:737) INFO: 26epoch:train:7801-7900batch: iter_time=8.890e-05, forward_time=0.291, loss_ctc=46.753, loss_att=53.505, acc=0.745, loss=51.479, backward_time=0.405, grad_norm=36.634, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.980e-04, train_time=1.668 +[gpub058:0/16] 2024-02-04 10:24:07,836 (trainer:737) INFO: 26epoch:train:7901-8000batch: iter_time=9.619e-05, forward_time=0.397, loss_ctc=46.688, loss_att=42.180, acc=0.771, loss=43.532, backward_time=0.442, grad_norm=35.494, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.979e-04, train_time=1.959 +[gpub058:0/16] 2024-02-04 10:26:30,602 (trainer:737) INFO: 26epoch:train:8001-8100batch: iter_time=8.859e-05, forward_time=0.307, loss_ctc=50.507, loss_att=45.156, acc=0.766, loss=46.761, backward_time=0.405, grad_norm=38.860, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.979e-04, train_time=1.428 +[gpub058:0/16] 2024-02-04 10:29:34,091 (trainer:737) INFO: 26epoch:train:8101-8200batch: iter_time=1.021e-04, forward_time=0.410, loss_ctc=47.701, loss_att=53.364, acc=0.737, loss=51.665, backward_time=0.432, grad_norm=41.491, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.979e-04, train_time=1.834 +[gpub058:0/16] 2024-02-04 10:32:23,109 (trainer:737) INFO: 26epoch:train:8201-8300batch: iter_time=1.046e-04, forward_time=0.290, loss_ctc=44.800, loss_att=46.734, acc=0.743, loss=46.154, backward_time=0.401, grad_norm=38.172, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.979e-04, train_time=1.691 +[gpub058:0/16] 2024-02-04 10:34:58,368 (trainer:737) INFO: 26epoch:train:8301-8400batch: iter_time=9.801e-05, forward_time=0.289, loss_ctc=46.347, loss_att=41.325, acc=0.762, loss=42.832, backward_time=0.401, grad_norm=40.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.978e-04, train_time=1.552 +[gpub058:0/16] 2024-02-04 10:38:04,464 (trainer:737) INFO: 26epoch:train:8401-8500batch: iter_time=4.707e-04, forward_time=0.371, loss_ctc=54.995, loss_att=57.724, acc=0.744, loss=56.905, backward_time=0.478, grad_norm=43.011, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.978e-04, train_time=1.859 +[gpub058:0/16] 2024-02-04 10:40:34,215 (trainer:737) INFO: 26epoch:train:8501-8600batch: iter_time=9.592e-05, forward_time=0.294, loss_ctc=49.871, loss_att=52.502, acc=0.736, loss=51.713, backward_time=0.405, grad_norm=47.030, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.978e-04, train_time=1.498 +[gpub058:0/16] 2024-02-04 10:43:36,360 (trainer:737) INFO: 26epoch:train:8601-8700batch: iter_time=9.454e-05, forward_time=0.293, loss_ctc=59.530, loss_att=63.268, acc=0.720, loss=62.147, backward_time=0.407, grad_norm=40.246, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.978e-04, train_time=1.821 +[gpub058:0/16] 2024-02-04 10:45:17,736 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-04 10:45:37,372 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 10:45:40,986 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 10:45:40,986 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-04 10:45:40,989 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 10:51:44,028 (trainer:737) INFO: 26epoch:train:8701-8800batch: iter_time=3.319, forward_time=0.355, loss_ctc=40.097, loss_att=43.190, acc=0.757, loss=42.262, backward_time=0.412, grad_norm=34.619, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.977e-04, train_time=4.876 +[gpub058:0/16] 2024-02-04 10:54:21,941 (trainer:737) INFO: 26epoch:train:8801-8900batch: iter_time=8.397e-05, forward_time=0.289, loss_ctc=44.782, loss_att=43.107, acc=0.754, loss=43.610, backward_time=0.402, grad_norm=35.731, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.977e-04, train_time=1.579 +[gpub058:0/16] 2024-02-04 10:57:08,582 (trainer:737) INFO: 26epoch:train:8901-9000batch: iter_time=8.523e-05, forward_time=0.377, loss_ctc=42.733, loss_att=41.640, acc=0.748, loss=41.968, backward_time=0.426, grad_norm=35.110, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.977e-04, train_time=1.666 +[gpub058:0/16] 2024-02-04 10:59:37,896 (trainer:737) INFO: 26epoch:train:9001-9100batch: iter_time=8.323e-05, forward_time=0.290, loss_ctc=45.826, loss_att=44.141, acc=0.760, loss=44.646, backward_time=0.403, grad_norm=35.175, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.976e-04, train_time=1.493 +[gpub058:0/16] 2024-02-04 11:02:45,577 (trainer:737) INFO: 26epoch:train:9101-9200batch: iter_time=8.890e-05, forward_time=0.438, loss_ctc=44.924, loss_att=50.647, acc=0.746, loss=48.930, backward_time=0.418, grad_norm=36.608, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.976e-04, train_time=1.877 +[gpub058:0/16] 2024-02-04 11:05:00,065 (trainer:737) INFO: 26epoch:train:9201-9300batch: iter_time=7.732e-05, forward_time=0.289, loss_ctc=51.000, loss_att=44.685, acc=0.756, loss=46.579, backward_time=0.404, grad_norm=40.408, clip=100.000, loss_scale=8.931e+33, optim_step_time=0.093, optim0_lr0=1.976e-04, train_time=1.345 +[gpub058:0/16] 2024-02-04 11:08:13,806 (trainer:737) INFO: 26epoch:train:9301-9400batch: iter_time=0.002, forward_time=0.435, loss_ctc=48.071, loss_att=46.718, acc=0.756, loss=47.124, backward_time=0.437, grad_norm=37.340, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.116, optim0_lr0=1.976e-04, train_time=1.937 +[gpub058:0/16] 2024-02-04 11:10:32,577 (trainer:737) INFO: 26epoch:train:9401-9500batch: iter_time=8.478e-05, forward_time=0.290, loss_ctc=43.878, loss_att=47.981, acc=0.742, loss=46.750, backward_time=0.403, grad_norm=34.942, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.975e-04, train_time=1.388 +[gpub058:0/16] 2024-02-04 11:13:45,483 (trainer:737) INFO: 26epoch:train:9501-9600batch: iter_time=7.010e-04, forward_time=0.403, loss_ctc=44.700, loss_att=43.871, acc=0.738, loss=44.119, backward_time=0.436, grad_norm=38.774, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.115, optim0_lr0=1.975e-04, train_time=1.928 +[gpub058:0/16] 2024-02-04 11:16:16,412 (trainer:737) INFO: 26epoch:train:9601-9700batch: iter_time=8.279e-05, forward_time=0.291, loss_ctc=49.934, loss_att=48.650, acc=0.744, loss=49.035, backward_time=0.403, grad_norm=40.204, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.975e-04, train_time=1.509 +[gpub058:0/16] 2024-02-04 11:19:15,141 (trainer:737) INFO: 26epoch:train:9701-9800batch: iter_time=3.836e-04, forward_time=0.401, loss_ctc=47.636, loss_att=47.284, acc=0.735, loss=47.390, backward_time=0.429, grad_norm=42.165, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.975e-04, train_time=1.787 +[gpub058:0/16] 2024-02-04 11:21:54,197 (trainer:737) INFO: 26epoch:train:9801-9900batch: iter_time=8.589e-05, forward_time=0.292, loss_ctc=55.594, loss_att=63.199, acc=0.715, loss=60.918, backward_time=0.408, grad_norm=43.606, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.974e-04, train_time=1.589 +[gpub058:0/16] 2024-02-04 11:25:10,745 (trainer:737) INFO: 26epoch:train:9901-10000batch: iter_time=5.404e-04, forward_time=0.408, loss_ctc=52.535, loss_att=55.170, acc=0.731, loss=54.380, backward_time=0.502, grad_norm=37.883, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=1.974e-04, train_time=1.967 +[gpub058:0/16] 2024-02-04 11:25:30,909 (multiple_iter_factory:32) INFO: Building 8th iter-factory... + +gpub071:440843:440893 [2] proxy.cc:1059 NCCL WARN [Proxy Service] Poll failed with error 1 +[gpub058:0/16] 2024-02-04 11:25:50,166 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 11:25:53,808 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 11:25:53,808 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-04 11:25:53,811 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 11:33:56,421 (trainer:737) INFO: 26epoch:train:10001-10100batch: iter_time=3.610, forward_time=0.287, loss_ctc=41.068, loss_att=39.059, acc=0.767, loss=39.662, backward_time=0.400, grad_norm=33.398, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.974e-04, train_time=5.257 +[gpub058:0/16] 2024-02-04 11:37:52,630 (trainer:737) INFO: 26epoch:train:10101-10200batch: iter_time=8.846e-05, forward_time=0.438, loss_ctc=45.272, loss_att=44.298, acc=0.745, loss=44.591, backward_time=0.422, grad_norm=38.510, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.110, optim0_lr0=1.974e-04, train_time=2.362 +[gpub058:0/16] 2024-02-04 11:40:47,786 (trainer:737) INFO: 26epoch:train:10201-10300batch: iter_time=8.720e-05, forward_time=0.288, loss_ctc=43.414, loss_att=40.328, acc=0.769, loss=41.253, backward_time=0.402, grad_norm=32.730, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.973e-04, train_time=1.746 +[gpub058:0/16] 2024-02-04 11:44:45,437 (trainer:737) INFO: 26epoch:train:10301-10400batch: iter_time=8.969e-05, forward_time=0.488, loss_ctc=46.786, loss_att=54.454, acc=0.744, loss=52.154, backward_time=0.433, grad_norm=37.583, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.973e-04, train_time=2.381 +[gpub058:0/16] 2024-02-04 11:46:23,252 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 11:48:04,037 (trainer:737) INFO: 26epoch:train:10401-10500batch: iter_time=9.001e-05, forward_time=0.287, loss_ctc=46.895, loss_att=42.134, acc=0.771, loss=43.562, backward_time=0.402, grad_norm=34.899, clip=100.000, loss_scale=7.972e+33, optim_step_time=0.092, optim0_lr0=1.973e-04, train_time=1.986 +[gpub058:0/16] 2024-02-04 11:50:50,498 (trainer:737) INFO: 26epoch:train:10501-10600batch: iter_time=8.496e-05, forward_time=0.289, loss_ctc=51.626, loss_att=45.361, acc=0.766, loss=47.241, backward_time=0.404, grad_norm=39.867, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.973e-04, train_time=1.664 +[gpub058:0/16] 2024-02-04 11:54:09,194 (trainer:737) INFO: 26epoch:train:10601-10700batch: iter_time=9.287e-05, forward_time=0.443, loss_ctc=47.440, loss_att=53.727, acc=0.739, loss=51.841, backward_time=0.428, grad_norm=39.866, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.972e-04, train_time=1.986 +[gpub058:0/16] 2024-02-04 11:56:56,785 (trainer:737) INFO: 26epoch:train:10701-10800batch: iter_time=9.830e-05, forward_time=0.289, loss_ctc=43.641, loss_att=46.782, acc=0.743, loss=45.840, backward_time=0.402, grad_norm=38.524, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.972e-04, train_time=1.676 +[gpub058:0/16] 2024-02-04 11:59:37,437 (trainer:737) INFO: 26epoch:train:10801-10900batch: iter_time=9.746e-05, forward_time=0.292, loss_ctc=45.859, loss_att=40.728, acc=0.764, loss=42.268, backward_time=0.404, grad_norm=39.566, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.972e-04, train_time=1.605 +[gpub058:0/16] 2024-02-04 12:03:04,246 (trainer:737) INFO: 26epoch:train:10901-11000batch: iter_time=3.958e-04, forward_time=0.394, loss_ctc=54.530, loss_att=57.975, acc=0.743, loss=56.941, backward_time=0.440, grad_norm=45.973, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.972e-04, train_time=2.069 +[gpub058:0/16] 2024-02-04 12:05:36,891 (trainer:737) INFO: 26epoch:train:11001-11100batch: iter_time=9.953e-05, forward_time=0.292, loss_ctc=49.606, loss_att=51.812, acc=0.738, loss=51.150, backward_time=0.406, grad_norm=43.944, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.971e-04, train_time=1.527 +[gpub058:0/16] 2024-02-04 12:08:39,643 (trainer:737) INFO: 26epoch:train:11101-11200batch: iter_time=9.933e-05, forward_time=0.495, loss_ctc=58.973, loss_att=62.861, acc=0.719, loss=61.695, backward_time=0.428, grad_norm=39.780, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.971e-04, train_time=1.827 +[gpub058:0/16] 2024-02-04 12:10:09,778 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-04 12:10:28,967 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 12:10:32,621 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 12:10:32,621 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-04 12:10:32,625 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 12:17:51,865 (trainer:737) INFO: 26epoch:train:11201-11300batch: iter_time=3.692, forward_time=0.470, loss_ctc=39.697, loss_att=41.973, acc=0.764, loss=41.290, backward_time=0.428, grad_norm=31.870, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.971e-04, train_time=5.521 +[gpub058:0/16] 2024-02-04 12:21:08,509 (trainer:737) INFO: 26epoch:train:11301-11400batch: iter_time=8.223e-05, forward_time=0.349, loss_ctc=45.424, loss_att=43.273, acc=0.756, loss=43.919, backward_time=0.501, grad_norm=36.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.971e-04, train_time=1.967 +[gpub058:0/16] 2024-02-04 12:24:50,642 (trainer:737) INFO: 26epoch:train:11401-11500batch: iter_time=9.410e-05, forward_time=0.434, loss_ctc=42.690, loss_att=40.427, acc=0.760, loss=41.106, backward_time=0.525, grad_norm=108.573, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.970e-04, train_time=2.221 +[gpub058:0/16] 2024-02-04 12:27:33,049 (trainer:737) INFO: 26epoch:train:11501-11600batch: iter_time=9.396e-05, forward_time=0.448, loss_ctc=45.825, loss_att=43.639, acc=0.766, loss=44.295, backward_time=0.431, grad_norm=34.745, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.970e-04, train_time=1.623 +[gpub058:0/16] 2024-02-04 12:31:01,751 (trainer:737) INFO: 26epoch:train:11601-11700batch: iter_time=9.045e-05, forward_time=0.375, loss_ctc=44.946, loss_att=49.731, acc=0.757, loss=48.295, backward_time=0.443, grad_norm=34.089, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.970e-04, train_time=2.086 +[gpub058:0/16] 2024-02-04 12:33:52,734 (trainer:737) INFO: 26epoch:train:11701-11800batch: iter_time=4.614e-04, forward_time=0.453, loss_ctc=50.916, loss_att=46.514, acc=0.756, loss=47.835, backward_time=0.416, grad_norm=39.679, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.970e-04, train_time=1.710 +[gpub058:0/16] 2024-02-04 12:36:50,548 (trainer:737) INFO: 26epoch:train:11801-11900batch: iter_time=8.521e-05, forward_time=0.347, loss_ctc=47.503, loss_att=47.182, acc=0.757, loss=47.278, backward_time=0.406, grad_norm=37.602, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.969e-04, train_time=1.779 +[gpub058:0/16] 2024-02-04 12:40:14,931 (trainer:737) INFO: 26epoch:train:11901-12000batch: iter_time=0.002, forward_time=0.419, loss_ctc=43.798, loss_att=47.755, acc=0.756, loss=46.568, backward_time=0.445, grad_norm=33.749, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.969e-04, train_time=2.041 +[gpub058:0/16] 2024-02-04 12:43:28,644 (trainer:737) INFO: 26epoch:train:12001-12100batch: iter_time=9.048e-04, forward_time=0.475, loss_ctc=44.265, loss_att=44.139, acc=0.752, loss=44.177, backward_time=0.430, grad_norm=40.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.969e-04, train_time=1.939 +[gpub058:0/16] 2024-02-04 12:46:49,595 (trainer:737) INFO: 26epoch:train:12101-12200batch: iter_time=3.534e-04, forward_time=0.462, loss_ctc=49.764, loss_att=47.549, acc=0.752, loss=48.213, backward_time=0.424, grad_norm=40.352, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.969e-04, train_time=2.009 +[gpub058:0/16] 2024-02-04 12:49:47,693 (trainer:737) INFO: 26epoch:train:12201-12300batch: iter_time=1.016e-04, forward_time=0.289, loss_ctc=48.076, loss_att=47.581, acc=0.751, loss=47.729, backward_time=0.408, grad_norm=39.102, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.968e-04, train_time=1.782 +[gpub058:0/16] 2024-02-04 12:53:00,929 (trainer:737) INFO: 26epoch:train:12301-12400batch: iter_time=0.002, forward_time=0.389, loss_ctc=55.062, loss_att=65.017, acc=0.716, loss=62.030, backward_time=0.488, grad_norm=45.061, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.119, optim0_lr0=1.968e-04, train_time=1.931 +[gpub058:0/16] 2024-02-04 12:56:08,930 (trainer:737) INFO: 26epoch:train:12401-12500batch: iter_time=3.487e-04, forward_time=0.424, loss_ctc=52.524, loss_att=54.308, acc=0.745, loss=53.773, backward_time=0.443, grad_norm=35.269, clip=100.000, loss_scale=7.581e+33, optim_step_time=0.104, optim0_lr0=1.968e-04, train_time=1.881 +[gpub058:0/16] 2024-02-04 12:56:28,981 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-04 12:56:48,777 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 12:56:52,456 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 12:56:52,456 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-04 12:56:52,459 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 13:05:03,917 (trainer:737) INFO: 26epoch:train:12501-12600batch: iter_time=3.418, forward_time=0.458, loss_ctc=41.206, loss_att=39.964, acc=0.764, loss=40.336, backward_time=0.429, grad_norm=31.844, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.968e-04, train_time=5.350 +[gpub058:0/16] 2024-02-04 13:07:52,742 (trainer:737) INFO: 26epoch:train:12601-12700batch: iter_time=8.748e-05, forward_time=0.289, loss_ctc=44.826, loss_att=44.246, acc=0.737, loss=44.420, backward_time=0.404, grad_norm=38.417, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.967e-04, train_time=1.688 +[gpub058:0/16] 2024-02-04 13:10:20,652 (trainer:737) INFO: 26epoch:train:12701-12800batch: iter_time=8.733e-05, forward_time=0.317, loss_ctc=43.501, loss_att=40.957, acc=0.761, loss=41.720, backward_time=0.402, grad_norm=33.293, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.967e-04, train_time=1.479 +[gpub058:0/16] 2024-02-04 13:13:30,706 (trainer:737) INFO: 26epoch:train:12801-12900batch: iter_time=8.504e-05, forward_time=0.430, loss_ctc=46.835, loss_att=53.675, acc=0.743, loss=51.623, backward_time=0.472, grad_norm=37.730, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=1.967e-04, train_time=1.900 +[gpub058:0/16] 2024-02-04 13:15:54,150 (trainer:737) INFO: 26epoch:train:12901-13000batch: iter_time=9.035e-05, forward_time=0.289, loss_ctc=45.642, loss_att=41.857, acc=0.761, loss=42.992, backward_time=0.402, grad_norm=34.741, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.967e-04, train_time=1.434 +[gpub058:0/16] 2024-02-04 13:19:00,010 (trainer:737) INFO: 26epoch:train:13001-13100batch: iter_time=2.798e-04, forward_time=0.309, loss_ctc=49.799, loss_att=43.380, acc=0.765, loss=45.305, backward_time=0.409, grad_norm=43.473, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.966e-04, train_time=1.858 +[gpub058:0/16] 2024-02-04 13:22:10,004 (trainer:737) INFO: 26epoch:train:13101-13200batch: iter_time=8.700e-05, forward_time=0.376, loss_ctc=46.515, loss_att=53.829, acc=0.730, loss=51.635, backward_time=0.453, grad_norm=38.416, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.966e-04, train_time=1.900 +[gpub058:0/16] 2024-02-04 13:24:41,152 (trainer:737) INFO: 26epoch:train:13201-13300batch: iter_time=9.233e-05, forward_time=0.287, loss_ctc=43.961, loss_att=45.055, acc=0.735, loss=44.727, backward_time=0.399, grad_norm=35.874, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.966e-04, train_time=1.511 +[gpub058:0/16] 2024-02-04 13:27:55,078 (trainer:737) INFO: 26epoch:train:13301-13400batch: iter_time=9.250e-05, forward_time=0.355, loss_ctc=45.674, loss_att=40.447, acc=0.758, loss=42.015, backward_time=0.409, grad_norm=38.940, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.966e-04, train_time=1.938 +[gpub058:0/16] 2024-02-04 13:31:05,068 (trainer:737) INFO: 26epoch:train:13401-13500batch: iter_time=9.323e-05, forward_time=0.395, loss_ctc=54.087, loss_att=56.797, acc=0.731, loss=55.984, backward_time=0.430, grad_norm=41.565, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.965e-04, train_time=1.900 +[gpub058:0/16] 2024-02-04 13:33:59,500 (trainer:737) INFO: 26epoch:train:13501-13600batch: iter_time=9.900e-05, forward_time=0.290, loss_ctc=48.855, loss_att=50.531, acc=0.730, loss=50.028, backward_time=0.403, grad_norm=45.967, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.965e-04, train_time=1.744 +[gpub058:0/16] 2024-02-04 13:36:57,829 (trainer:737) INFO: 26epoch:train:13601-13700batch: iter_time=9.453e-05, forward_time=0.414, loss_ctc=58.355, loss_att=61.635, acc=0.719, loss=60.651, backward_time=0.448, grad_norm=38.348, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.965e-04, train_time=1.783 +[gpub058:0/16] 2024-02-04 13:38:29,023 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-04 13:38:48,150 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 13:38:51,808 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 13:38:51,808 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-04 13:38:51,811 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 13:45:38,362 (trainer:737) INFO: 26epoch:train:13701-13800batch: iter_time=3.455, forward_time=0.314, loss_ctc=39.669, loss_att=43.416, acc=0.756, loss=42.292, backward_time=0.416, grad_norm=34.396, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.964e-04, train_time=5.205 +[gpub058:0/16] 2024-02-04 13:48:23,639 (trainer:737) INFO: 26epoch:train:13801-13900batch: iter_time=8.475e-05, forward_time=0.430, loss_ctc=44.566, loss_att=43.738, acc=0.755, loss=43.986, backward_time=0.463, grad_norm=35.413, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.964e-04, train_time=1.652 +[gpub058:0/16] 2024-02-04 13:51:03,322 (trainer:737) INFO: 26epoch:train:13901-14000batch: iter_time=8.321e-05, forward_time=0.390, loss_ctc=42.465, loss_att=40.525, acc=0.758, loss=41.107, backward_time=0.416, grad_norm=34.252, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.964e-04, train_time=1.597 +[gpub058:0/16] 2024-02-04 13:54:04,857 (trainer:737) INFO: 26epoch:train:14001-14100batch: iter_time=9.713e-05, forward_time=0.349, loss_ctc=46.165, loss_att=43.796, acc=0.766, loss=44.507, backward_time=0.412, grad_norm=36.419, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.964e-04, train_time=1.815 +[gpub058:0/16] 2024-02-04 13:56:44,186 (trainer:737) INFO: 26epoch:train:14101-14200batch: iter_time=8.735e-05, forward_time=0.464, loss_ctc=45.257, loss_att=50.531, acc=0.755, loss=48.949, backward_time=0.440, grad_norm=37.412, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.963e-04, train_time=1.593 +[gpub058:0/16] 2024-02-04 13:59:39,066 (trainer:737) INFO: 26epoch:train:14201-14300batch: iter_time=9.620e-05, forward_time=0.294, loss_ctc=51.236, loss_att=45.839, acc=0.758, loss=47.458, backward_time=0.406, grad_norm=40.844, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.963e-04, train_time=1.749 +[gpub058:0/16] 2024-02-04 14:02:26,156 (trainer:737) INFO: 26epoch:train:14301-14400batch: iter_time=9.146e-04, forward_time=0.417, loss_ctc=47.442, loss_att=46.923, acc=0.758, loss=47.078, backward_time=0.446, grad_norm=37.061, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.963e-04, train_time=1.669 +[gpub058:0/16] 2024-02-04 14:05:57,279 (trainer:737) INFO: 26epoch:train:14401-14500batch: iter_time=9.090e-04, forward_time=0.499, loss_ctc=43.850, loss_att=47.333, acc=0.757, loss=46.288, backward_time=0.497, grad_norm=35.169, clip=100.000, loss_scale=1.516e+34, optim_step_time=0.113, optim0_lr0=1.963e-04, train_time=2.112 +[gpub058:0/16] 2024-02-04 14:08:09,424 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 14:08:44,161 (trainer:737) INFO: 26epoch:train:14501-14600batch: iter_time=8.479e-05, forward_time=0.432, loss_ctc=44.176, loss_att=44.531, acc=0.751, loss=44.425, backward_time=0.430, grad_norm=38.504, clip=100.000, loss_scale=1.951e+34, optim_step_time=0.095, optim0_lr0=1.962e-04, train_time=1.668 +[gpub058:0/16] 2024-02-04 14:11:59,611 (trainer:737) INFO: 26epoch:train:14601-14700batch: iter_time=3.506e-04, forward_time=0.443, loss_ctc=49.891, loss_att=48.669, acc=0.749, loss=49.036, backward_time=0.433, grad_norm=43.798, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.962e-04, train_time=1.955 +[gpub058:0/16] 2024-02-04 14:12:40,061 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 14:14:38,321 (trainer:737) INFO: 26epoch:train:14701-14800batch: iter_time=9.822e-05, forward_time=0.289, loss_ctc=47.346, loss_att=48.141, acc=0.749, loss=47.902, backward_time=0.402, grad_norm=38.768, clip=100.000, loss_scale=6.399e+33, optim_step_time=0.092, optim0_lr0=1.962e-04, train_time=1.588 +[gpub058:0/16] 2024-02-04 14:17:45,381 (trainer:737) INFO: 26epoch:train:14801-14900batch: iter_time=7.922e-04, forward_time=0.474, loss_ctc=55.140, loss_att=65.623, acc=0.715, loss=62.478, backward_time=0.428, grad_norm=43.764, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.962e-04, train_time=1.869 +[gpub058:0/16] 2024-02-04 14:21:06,550 (trainer:737) INFO: 26epoch:train:14901-15000batch: iter_time=3.854e-04, forward_time=0.478, loss_ctc=51.927, loss_att=54.100, acc=0.744, loss=53.448, backward_time=0.422, grad_norm=40.179, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.961e-04, train_time=2.011 +[gpub058:0/16] 2024-02-04 15:00:16,689 (trainer:343) INFO: 26epoch results: [train] iter_time=0.273, forward_time=0.350, loss_ctc=48.034, loss_att=48.133, acc=0.746, loss=48.103, backward_time=0.422, grad_norm=38.932, clip=100.000, loss_scale=6.550e+33, optim_step_time=0.096, optim0_lr0=1.981e-04, train_time=1.986, time=8 hours, 17 minutes and 4.67 seconds, total_count=420000, gpu_max_cached_mem_GB=43.281, [valid] loss_ctc=37.613, cer_ctc=0.192, loss_att=39.718, acc=0.669, cer=0.324, wer=0.996, loss=39.086, time=38 minutes and 46.09 seconds, total_count=130788, gpu_max_cached_mem_GB=43.281 +[gpub058:0/16] 2024-02-04 15:00:27,267 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub058:0/16] 2024-02-04 15:00:27,338 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/18epoch.pth, exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/21epoch.pth +[gpub058:0/16] 2024-02-04 15:00:27,338 (trainer:272) INFO: 27/45epoch started. Estimated time to finish: 6 days, 17 hours and 20 minutes +[gpub058:0/16] 2024-02-04 15:00:27,347 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-04 15:00:45,587 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 15:00:49,047 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 15:00:49,047 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-04 15:00:49,050 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 15:07:51,988 (trainer:737) INFO: 27epoch:train:1-100batch: iter_time=2.917, forward_time=0.385, loss_ctc=54.767, loss_att=57.365, acc=0.711, loss=56.586, backward_time=0.414, grad_norm=43.706, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.961e-04, train_time=4.446 +[gpub058:0/16] 2024-02-04 15:10:32,064 (trainer:737) INFO: 27epoch:train:101-200batch: iter_time=8.308e-05, forward_time=0.292, loss_ctc=51.994, loss_att=60.561, acc=0.727, loss=57.991, backward_time=0.406, grad_norm=42.761, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.961e-04, train_time=1.600 +[gpub058:0/16] 2024-02-04 15:13:03,816 (trainer:737) INFO: 27epoch:train:201-300batch: iter_time=7.986e-05, forward_time=0.372, loss_ctc=49.813, loss_att=44.933, acc=0.762, loss=46.397, backward_time=0.454, grad_norm=37.857, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.961e-04, train_time=1.518 +[gpub058:0/16] 2024-02-04 15:16:00,077 (trainer:737) INFO: 27epoch:train:301-400batch: iter_time=9.039e-05, forward_time=0.290, loss_ctc=47.558, loss_att=43.511, acc=0.775, loss=44.725, backward_time=0.402, grad_norm=36.157, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.960e-04, train_time=1.762 +[gpub058:0/16] 2024-02-04 15:18:52,534 (trainer:737) INFO: 27epoch:train:401-500batch: iter_time=8.665e-05, forward_time=0.376, loss_ctc=46.229, loss_att=43.130, acc=0.761, loss=44.060, backward_time=0.434, grad_norm=35.222, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.960e-04, train_time=1.724 +[gpub058:0/16] 2024-02-04 15:21:27,591 (trainer:737) INFO: 27epoch:train:501-600batch: iter_time=9.145e-05, forward_time=0.329, loss_ctc=42.137, loss_att=36.115, acc=0.761, loss=37.921, backward_time=0.421, grad_norm=33.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.960e-04, train_time=1.551 +[gpub058:0/16] 2024-02-04 15:24:05,110 (trainer:737) INFO: 27epoch:train:601-700batch: iter_time=8.252e-05, forward_time=0.343, loss_ctc=48.946, loss_att=40.719, acc=0.751, loss=43.187, backward_time=0.428, grad_norm=38.305, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.960e-04, train_time=1.575 +[gpub058:0/16] 2024-02-04 15:26:41,817 (trainer:737) INFO: 27epoch:train:701-800batch: iter_time=9.203e-05, forward_time=0.311, loss_ctc=50.154, loss_att=49.336, acc=0.744, loss=49.582, backward_time=0.407, grad_norm=38.778, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.959e-04, train_time=1.566 +[gpub058:0/16] 2024-02-04 15:29:23,788 (trainer:737) INFO: 27epoch:train:801-900batch: iter_time=1.968e-04, forward_time=0.306, loss_ctc=40.491, loss_att=42.932, acc=0.749, loss=42.200, backward_time=0.444, grad_norm=35.619, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.959e-04, train_time=1.620 +[gpub058:0/16] 2024-02-04 15:32:05,491 (trainer:737) INFO: 27epoch:train:901-1000batch: iter_time=8.553e-05, forward_time=0.335, loss_ctc=51.601, loss_att=50.203, acc=0.721, loss=50.622, backward_time=0.425, grad_norm=42.651, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.959e-04, train_time=1.617 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2932506.0 ON gpub058 CANCELLED AT 2024-02-04T15:34:22 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.11.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.11.log new file mode 100644 index 0000000000000000000000000000000000000000..9a1c8649c357c02f91dede400feeb1dc9f569fe2 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.11.log @@ -0,0 +1,2281 @@ +# Running on gpub022.delta.ncsa.illinois.edu +# Started at Thu Feb 1 10:53:31 CST 2024 +# SLURMD_NODENAME=gpub022 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2913929 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1706979193 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2913929 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[022,058,062,085]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706806393 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[022,058,062,085]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=1741645 +# SLURM_TOPOLOGY_ADDR=ss00.ss10.gpub022 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_faee9aed-7ec4-484d-9dae-05f165fc470e +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_faee9aed-7ec4-484d-9dae-05f165fc470e +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessi_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_faee9aed-7ec4-484d-9dae-05f165fc470e +ng_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_faee9aed-7ec4-484d-9dae-05f165fc470e +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_faee9aed-7ec4-484d-9dae-05f165fc470e +[gpub022:0/16] 2024-02-01 10:57:21,085 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub022:0/16] 2024-02-01 10:57:21,202 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub022:0/16] 2024-02-01 10:57:21,264 (s2t:464) INFO: Vocabulary size: 50002 +[gpub022:0/16] 2024-02-01 10:57:30,965 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub022:0/16] 2024-02-01 10:57:30,971 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub022:0/16] 2024-02-01 10:57:30,971 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub022:0/16] 2024-02-01 10:57:30,971 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub022:0/16] 2024-02-01 10:57:30,985 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub022:0/16] 2024-02-01 10:57:36,611 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 10:57:37,536 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 10:57:37,536 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub022:0/16] 2024-02-01 10:57:37,537 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 10:57:49,866 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub022:1741720:1741720 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1741720:1741720 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1741720:1741720 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub022:0/16] 2024-02-01 10:57:55,128 (trainer:284) INFO: 18/45epoch started +[gpub022:0/16] 2024-02-01 10:57:55,173 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub022:0/16] 2024-02-01 10:58:13,046 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 10:58:16,468 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 10:58:16,468 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub022:0/16] 2024-02-01 10:58:16,471 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub062:1370885:1370885 [2] NCCL INFO cudaDriverVersion 12020 +gpub062:1370885:1370885 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1370885:1370885 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1370885:1370939 [2] NCCL INFO NET/IB : No device found. +gpub062:1370885:1370939 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1370885:1370939 [2] NCCL INFO Using network Socket +gpub062:1370885:1370939 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub062:1370885:1370939 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub062:1370885:1370939 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub062:1370885:1370939 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub062:1370885:1370939 [2] NCCL INFO Connected all rings +gpub062:1370885:1370939 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub062:1370885:1370939 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub062:1370885:1370939 [2] NCCL INFO Connected all trees +gpub062:1370885:1370939 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1370885:1370939 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1370885:1370939 [2] NCCL INFO comm 0x1ac32740 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub085:2091268:2091268 [1] NCCL INFO cudaDriverVersion 12020 +gpub085:2091268:2091268 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:2091268:2091268 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:2091268:2091322 [1] NCCL INFO NET/IB : No device found. +gpub085:2091268:2091322 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:2091268:2091322 [1] NCCL INFO Using network Socket +gpub085:2091268:2091322 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub085:2091268:2091322 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub085:2091268:2091322 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub085:2091268:2091322 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub085:2091268:2091322 [1] NCCL INFO Connected all rings +gpub085:2091268:2091322 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub085:2091268:2091322 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub085:2091268:2091322 [1] NCCL INFO Connected all trees +gpub085:2091268:2091322 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub085:2091268:2091322 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:2091268:2091322 [1] NCCL INFO comm 0x135025c0 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub085:2091269:2091269 [2] NCCL INFO cudaDriverVersion 12020 +gpub085:2091269:2091269 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:2091269:2091269 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:2091269:2091323 [2] NCCL INFO NET/IB : No device found. +gpub085:2091269:2091323 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:2091269:2091323 [2] NCCL INFO Using network Socket +gpub085:2091269:2091323 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub085:2091269:2091323 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub085:2091269:2091323 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub085:2091269:2091323 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub085:2091269:2091323 [2] NCCL INFO Connected all rings +gpub085:2091269:2091323 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub085:2091269:2091323 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub085:2091269:2091323 [2] NCCL INFO Connected all trees +gpub085:2091269:2091323 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub085:2091269:2091323 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:2091269:2091323 [2] NCCL INFO comm 0x1312b960 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub085:2091270:2091270 [3] NCCL INFO cudaDriverVersion 12020 +gpub085:2091270:2091270 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:2091270:2091270 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:2091270:2091324 [3] NCCL INFO NET/IB : No device found. +gpub085:2091270:2091324 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:2091270:2091324 [3] NCCL INFO Using network Socket +gpub085:2091270:2091324 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub085:2091270:2091324 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub085:2091270:2091324 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub085:2091270:2091324 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub085:2091270:2091324 [3] NCCL INFO Connected all rings +gpub085:2091270:2091324 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub085:2091270:2091324 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub085:2091270:2091324 [3] NCCL INFO Connected all trees +gpub085:2091270:2091324 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub085:2091270:2091324 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:2091270:2091324 [3] NCCL INFO comm 0x16005160 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub062:1370886:1370886 [3] NCCL INFO cudaDriverVersion 12020 +gpub062:1370886:1370886 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1370886:1370886 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1370886:1370941 [3] NCCL INFO NET/IB : No device found. +gpub062:1370886:1370941 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1370886:1370941 [3] NCCL INFO Using network Socket +gpub062:1370886:1370941 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub062:1370886:1370941 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub062:1370886:1370941 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub062:1370886:1370941 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub062:1370886:1370941 [3] NCCL INFO Connected all rings +gpub062:1370886:1370941 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub062:1370886:1370941 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub062:1370886:1370941 [3] NCCL INFO Connected all trees +gpub062:1370886:1370941 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1370886:1370941 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1370886:1370941 [3] NCCL INFO comm 0x17dedec0 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub062:1370884:1370884 [1] NCCL INFO cudaDriverVersion 12020 +gpub062:1370884:1370884 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1370884:1370884 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1370884:1370942 [1] NCCL INFO NET/IB : No device found. +gpub062:1370884:1370942 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1370884:1370942 [1] NCCL INFO Using network Socket +gpub062:1370884:1370942 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub062:1370884:1370942 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub062:1370884:1370942 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub062:1370884:1370942 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub062:1370884:1370942 [1] NCCL INFO Connected all rings +gpub062:1370884:1370942 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub062:1370884:1370942 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub062:1370884:1370942 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub062:1370884:1370942 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub062:1370884:1370942 [1] NCCL INFO Connected all trees +gpub062:1370884:1370942 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1370884:1370942 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1370884:1370942 [1] NCCL INFO comm 0x18b7c9b0 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub085:2091267:2091267 [0] NCCL INFO cudaDriverVersion 12020 +gpub085:2091267:2091267 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:2091267:2091267 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:2091267:2091325 [0] NCCL INFO NET/IB : No device found. +gpub085:2091267:2091325 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:2091267:2091325 [0] NCCL INFO Using network Socket +gpub085:2091267:2091325 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub085:2091267:2091325 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub085:2091267:2091325 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub085:2091267:2091325 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub085:2091267:2091325 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub085:2091267:2091325 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub085:2091267:2091325 [0] NCCL INFO Connected all rings +gpub085:2091267:2091325 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub085:2091267:2091325 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub085:2091267:2091325 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub085:2091267:2091325 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub085:2091267:2091325 [0] NCCL INFO Connected all trees +gpub085:2091267:2091325 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub085:2091267:2091325 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:2091267:2091325 [0] NCCL INFO comm 0x155be520 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub062:1370883:1370883 [0] NCCL INFO cudaDriverVersion 12020 +gpub062:1370883:1370883 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:1370883:1370883 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:1370883:1370940 [0] NCCL INFO NET/IB : No device found. +gpub062:1370883:1370940 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:1370883:1370940 [0] NCCL INFO Using network Socket +gpub062:1370883:1370940 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub062:1370883:1370940 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub062:1370883:1370940 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub062:1370883:1370940 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub062:1370883:1370940 [0] NCCL INFO Connected all rings +gpub062:1370883:1370940 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub062:1370883:1370940 [0] NCCL INFO Connected all trees +gpub062:1370883:1370940 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub062:1370883:1370940 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:1370883:1370940 [0] NCCL INFO comm 0x16a32520 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub058:1925470:1925470 [2] NCCL INFO cudaDriverVersion 12020 +gpub058:1925470:1925470 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1925470:1925470 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1925470:1925523 [2] NCCL INFO NET/IB : No device found. +gpub058:1925470:1925523 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:1925470:1925523 [2] NCCL INFO Using network Socket +gpub058:1925470:1925523 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub058:1925470:1925523 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub058:1925470:1925523 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub058:1925470:1925523 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub058:1925470:1925523 [2] NCCL INFO Connected all rings +gpub058:1925470:1925523 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub058:1925470:1925523 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub058:1925470:1925523 [2] NCCL INFO Connected all trees +gpub058:1925470:1925523 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:1925470:1925523 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1925470:1925523 [2] NCCL INFO comm 0x1cf649b0 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:1925471:1925471 [3] NCCL INFO cudaDriverVersion 12020 +gpub058:1925471:1925471 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1925471:1925471 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1925471:1925524 [3] NCCL INFO NET/IB : No device found. +gpub058:1925471:1925524 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:1925471:1925524 [3] NCCL INFO Using network Socket +gpub058:1925471:1925524 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub058:1925471:1925524 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub058:1925471:1925524 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub058:1925471:1925524 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub058:1925471:1925524 [3] NCCL INFO Connected all rings +gpub058:1925471:1925524 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub058:1925471:1925524 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub058:1925471:1925524 [3] NCCL INFO Connected all trees +gpub058:1925471:1925524 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:1925471:1925524 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1925471:1925524 [3] NCCL INFO comm 0x1a09fc10 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub022:1741722:1741722 [2] NCCL INFO cudaDriverVersion 12020 +gpub022:1741722:1741722 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1741722:1741722 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1741722:1741773 [2] NCCL INFO NET/IB : No device found. +gpub022:1741722:1741773 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1741722:1741773 [2] NCCL INFO Using network Socket +gpub022:1741722:1741773 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub022:1741722:1741773 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub022:1741722:1741773 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub022:1741722:1741773 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub022:1741722:1741773 [2] NCCL INFO Connected all rings +gpub022:1741722:1741773 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub058:1925468:1925468 [0] NCCL INFO cudaDriverVersion 12020 +gpub058:1925468:1925468 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1925468:1925468 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1925468:1925525 [0] NCCL INFO NET/IB : No device found. +gpub058:1925468:1925525 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:1925468:1925525 [0] NCCL INFO Using network Socket +gpub058:1925468:1925525 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub058:1925468:1925525 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub058:1925468:1925525 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub022:1741722:1741773 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub022:1741722:1741773 [2] NCCL INFO Connected all trees +gpub022:1741722:1741773 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub022:1741722:1741773 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1741722:1741773 [2] NCCL INFO comm 0x15a46340 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:1925468:1925525 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub058:1925468:1925525 [0] NCCL INFO Connected all rings +gpub058:1925468:1925525 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub058:1925468:1925525 [0] NCCL INFO Connected all trees +gpub058:1925468:1925525 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:1925468:1925525 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1925468:1925525 [0] NCCL INFO comm 0x1822ca70 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub058:1925469:1925469 [1] NCCL INFO cudaDriverVersion 12020 +gpub058:1925469:1925469 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1925469:1925469 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1925469:1925526 [1] NCCL INFO NET/IB : No device found. +gpub058:1925469:1925526 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:1925469:1925526 [1] NCCL INFO Using network Socket +gpub058:1925469:1925526 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub058:1925469:1925526 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub058:1925469:1925526 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub058:1925469:1925526 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub058:1925469:1925526 [1] NCCL INFO Connected all rings +gpub058:1925469:1925526 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub058:1925469:1925526 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub058:1925469:1925526 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub058:1925469:1925526 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub058:1925469:1925526 [1] NCCL INFO Connected all trees +gpub058:1925469:1925526 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:1925469:1925526 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1925469:1925526 [1] NCCL INFO comm 0x18578770 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub022:1741720:1741770 [0] NCCL INFO NET/IB : No device found. +gpub022:1741720:1741770 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1741720:1741770 [0] NCCL INFO Using network Socket +gpub022:1741720:1741770 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub022:1741720:1741770 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub022:1741720:1741770 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub022:1741720:1741770 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub022:1741720:1741770 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub022:1741720:1741770 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub022:1741720:1741770 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub022:1741720:1741770 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub022:1741720:1741770 [0] NCCL INFO Connected all rings +gpub022:1741720:1741770 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub022:1741720:1741770 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub022:1741720:1741770 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpub022:1741720:1741770 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub022:1741720:1741770 [0] NCCL INFO Connected all trees +gpub022:1741720:1741770 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub022:1741720:1741770 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1741720:1741770 [0] NCCL INFO comm 0x14a37600 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:1741721:1741721 [1] NCCL INFO cudaDriverVersion 12020 +gpub022:1741721:1741721 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1741721:1741721 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1741721:1741772 [1] NCCL INFO NET/IB : No device found. +gpub022:1741721:1741772 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1741721:1741772 [1] NCCL INFO Using network Socket +gpub022:1741721:1741772 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub022:1741721:1741772 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub022:1741721:1741772 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub022:1741721:1741772 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub022:1741721:1741772 [1] NCCL INFO Connected all rings +gpub022:1741721:1741772 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub022:1741721:1741772 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub022:1741721:1741772 [1] NCCL INFO Connected all trees +gpub022:1741721:1741772 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub022:1741721:1741772 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1741721:1741772 [1] NCCL INFO comm 0x13440400 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub022:1741723:1741723 [3] NCCL INFO cudaDriverVersion 12020 +gpub022:1741723:1741723 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1741723:1741723 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1741723:1741771 [3] NCCL INFO NET/IB : No device found. +gpub022:1741723:1741771 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1741723:1741771 [3] NCCL INFO Using network Socket +gpub022:1741723:1741771 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub022:1741723:1741771 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub022:1741723:1741771 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub022:1741723:1741771 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub022:1741723:1741771 [3] NCCL INFO Connected all rings +gpub022:1741723:1741771 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub022:1741723:1741771 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub022:1741723:1741771 [3] NCCL INFO Connected all trees +gpub022:1741723:1741771 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub022:1741723:1741771 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1741723:1741771 [3] NCCL INFO comm 0xf49a900 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +[gpub022:0/16] 2024-02-01 11:07:00,776 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub022:0/16] 2024-02-01 11:09:51,496 (trainer:737) INFO: 18epoch:train:1-100batch: iter_time=4.046, forward_time=0.519, loss_ctc=46.466, loss_att=47.483, acc=0.742, loss=47.178, backward_time=0.455, grad_norm=31.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.425e-04, train_time=7.147 +[gpub022:0/16] 2024-02-01 11:12:45,266 (trainer:737) INFO: 18epoch:train:101-200batch: iter_time=4.226e-04, forward_time=0.415, loss_ctc=54.999, loss_att=52.041, acc=0.726, loss=52.928, backward_time=0.455, grad_norm=33.639, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.425e-04, train_time=1.753 +[gpub022:0/16] 2024-02-01 11:15:46,586 (trainer:737) INFO: 18epoch:train:201-300batch: iter_time=0.001, forward_time=0.496, loss_ctc=45.115, loss_att=40.852, acc=0.730, loss=42.131, backward_time=0.488, grad_norm=31.469, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=2.424e-04, train_time=1.813 +[gpub022:0/16] 2024-02-01 11:18:55,440 (trainer:737) INFO: 18epoch:train:301-400batch: iter_time=1.043e-04, forward_time=0.474, loss_ctc=54.709, loss_att=51.740, acc=0.727, loss=52.630, backward_time=0.458, grad_norm=35.145, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.424e-04, train_time=1.888 +[gpub022:0/16] 2024-02-01 11:21:48,449 (trainer:737) INFO: 18epoch:train:401-500batch: iter_time=6.817e-04, forward_time=0.523, loss_ctc=63.351, loss_att=53.063, acc=0.731, loss=56.149, backward_time=0.484, grad_norm=57.372, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.423e-04, train_time=1.731 +[gpub022:0/16] 2024-02-01 11:24:38,360 (trainer:737) INFO: 18epoch:train:501-600batch: iter_time=3.971e-04, forward_time=0.445, loss_ctc=51.655, loss_att=56.947, acc=0.716, loss=55.359, backward_time=0.446, grad_norm=35.599, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.423e-04, train_time=1.699 +[gpub022:0/16] 2024-02-01 11:27:29,655 (trainer:737) INFO: 18epoch:train:601-700batch: iter_time=4.565e-04, forward_time=0.546, loss_ctc=62.433, loss_att=51.183, acc=0.734, loss=54.558, backward_time=0.462, grad_norm=39.300, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.422e-04, train_time=1.712 +[gpub022:0/16] 2024-02-01 11:30:17,698 (trainer:737) INFO: 18epoch:train:701-800batch: iter_time=2.025e-04, forward_time=0.446, loss_ctc=50.113, loss_att=49.351, acc=0.735, loss=49.580, backward_time=0.433, grad_norm=33.964, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.422e-04, train_time=1.681 +[gpub022:0/16] 2024-02-01 11:33:02,286 (trainer:737) INFO: 18epoch:train:801-900batch: iter_time=5.816e-04, forward_time=0.391, loss_ctc=59.098, loss_att=52.623, acc=0.734, loss=54.566, backward_time=0.477, grad_norm=38.176, clip=100.000, loss_scale=9.242e+33, optim_step_time=0.103, optim0_lr0=2.421e-04, train_time=1.646 +[gpub022:0/16] 2024-02-01 11:35:43,414 (trainer:737) INFO: 18epoch:train:901-1000batch: iter_time=4.848e-04, forward_time=0.365, loss_ctc=50.237, loss_att=49.477, acc=0.745, loss=49.705, backward_time=0.493, grad_norm=32.129, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.421e-04, train_time=1.611 +[gpub022:0/16] 2024-02-01 11:38:43,681 (trainer:737) INFO: 18epoch:train:1001-1100batch: iter_time=4.796e-04, forward_time=0.403, loss_ctc=56.200, loss_att=55.943, acc=0.729, loss=56.020, backward_time=0.464, grad_norm=36.639, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.420e-04, train_time=1.801 +[gpub022:0/16] 2024-02-01 11:41:37,382 (trainer:737) INFO: 18epoch:train:1101-1200batch: iter_time=2.010e-04, forward_time=0.418, loss_ctc=48.253, loss_att=44.765, acc=0.719, loss=45.811, backward_time=0.448, grad_norm=34.894, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.420e-04, train_time=1.738 +[gpub022:0/16] 2024-02-01 11:43:11,742 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub022:0/16] 2024-02-01 11:43:30,286 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 11:43:33,923 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 11:43:33,923 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub022:0/16] 2024-02-01 11:43:33,926 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 11:53:24,853 (trainer:737) INFO: 18epoch:train:1201-1300batch: iter_time=3.383, forward_time=0.433, loss_ctc=45.716, loss_att=47.383, acc=0.727, loss=46.883, backward_time=0.451, grad_norm=31.351, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.420e-04, train_time=7.075 +[gpub022:0/16] 2024-02-01 11:56:37,030 (trainer:737) INFO: 18epoch:train:1301-1400batch: iter_time=8.825e-05, forward_time=0.386, loss_ctc=53.141, loss_att=48.761, acc=0.745, loss=50.075, backward_time=0.457, grad_norm=31.905, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.419e-04, train_time=1.922 +[gpub022:0/16] 2024-02-01 11:59:24,365 (trainer:737) INFO: 18epoch:train:1401-1500batch: iter_time=9.048e-05, forward_time=0.377, loss_ctc=48.752, loss_att=44.621, acc=0.742, loss=45.860, backward_time=0.470, grad_norm=30.073, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=2.419e-04, train_time=1.673 +[gpub022:0/16] 2024-02-01 12:02:10,692 (trainer:737) INFO: 18epoch:train:1501-1600batch: iter_time=9.158e-05, forward_time=0.368, loss_ctc=43.387, loss_att=38.956, acc=0.727, loss=40.285, backward_time=0.449, grad_norm=32.587, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.418e-04, train_time=1.663 +[gpub022:0/16] 2024-02-01 12:05:33,236 (trainer:737) INFO: 18epoch:train:1601-1700batch: iter_time=2.274e-04, forward_time=0.514, loss_ctc=56.705, loss_att=55.931, acc=0.727, loss=56.163, backward_time=0.475, grad_norm=47.712, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=2.418e-04, train_time=2.025 +[gpub022:0/16] 2024-02-01 12:08:32,824 (trainer:737) INFO: 18epoch:train:1701-1800batch: iter_time=0.001, forward_time=0.395, loss_ctc=58.414, loss_att=51.942, acc=0.739, loss=53.883, backward_time=0.474, grad_norm=34.973, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.417e-04, train_time=1.795 +[gpub022:0/16] 2024-02-01 12:12:10,647 (trainer:737) INFO: 18epoch:train:1801-1900batch: iter_time=5.196e-04, forward_time=0.573, loss_ctc=52.384, loss_att=50.784, acc=0.735, loss=51.264, backward_time=0.496, grad_norm=35.042, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.111, optim0_lr0=2.417e-04, train_time=2.179 +[gpub022:0/16] 2024-02-01 12:15:08,305 (trainer:737) INFO: 18epoch:train:1901-2000batch: iter_time=3.750e-04, forward_time=0.385, loss_ctc=55.973, loss_att=48.891, acc=0.732, loss=51.016, backward_time=0.481, grad_norm=36.302, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.416e-04, train_time=1.776 +[gpub022:0/16] 2024-02-01 12:17:50,982 (trainer:737) INFO: 18epoch:train:2001-2100batch: iter_time=2.930e-04, forward_time=0.412, loss_ctc=55.744, loss_att=55.374, acc=0.736, loss=55.485, backward_time=0.466, grad_norm=36.727, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.416e-04, train_time=1.627 +[gpub022:0/16] 2024-02-01 12:20:45,876 (trainer:737) INFO: 18epoch:train:2101-2200batch: iter_time=8.576e-05, forward_time=0.401, loss_ctc=49.662, loss_att=47.217, acc=0.734, loss=47.950, backward_time=0.439, grad_norm=33.168, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.415e-04, train_time=1.750 +[gpub022:0/16] 2024-02-01 12:23:45,744 (trainer:737) INFO: 18epoch:train:2201-2300batch: iter_time=4.022e-04, forward_time=0.436, loss_ctc=54.243, loss_att=50.634, acc=0.753, loss=51.717, backward_time=0.441, grad_norm=33.689, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.415e-04, train_time=1.798 +[gpub022:0/16] 2024-02-01 12:25:20,805 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 12:26:52,687 (trainer:737) INFO: 18epoch:train:2301-2400batch: iter_time=4.990e-04, forward_time=0.409, loss_ctc=54.007, loss_att=55.198, acc=0.712, loss=54.841, backward_time=0.442, grad_norm=40.621, clip=100.000, loss_scale=8.024e+33, optim_step_time=0.099, optim0_lr0=2.414e-04, train_time=1.869 +[gpub022:0/16] 2024-02-01 12:29:40,107 (trainer:737) INFO: 18epoch:train:2401-2500batch: iter_time=3.334e-04, forward_time=0.425, loss_ctc=44.624, loss_att=42.682, acc=0.732, loss=43.265, backward_time=0.443, grad_norm=29.779, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.414e-04, train_time=1.674 +[gpub022:0/16] 2024-02-01 12:30:00,430 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub022:0/16] 2024-02-01 12:30:19,175 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 12:30:23,037 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 12:30:23,037 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub022:0/16] 2024-02-01 12:30:23,042 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 12:41:34,413 (trainer:737) INFO: 18epoch:train:2501-2600batch: iter_time=3.332, forward_time=0.319, loss_ctc=45.704, loss_att=47.858, acc=0.734, loss=47.212, backward_time=0.446, grad_norm=32.112, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.413e-04, train_time=7.144 +[gpub022:0/16] 2024-02-01 12:44:47,806 (trainer:737) INFO: 18epoch:train:2601-2700batch: iter_time=9.059e-05, forward_time=0.399, loss_ctc=53.511, loss_att=50.016, acc=0.719, loss=51.065, backward_time=0.421, grad_norm=32.100, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.413e-04, train_time=1.934 +[gpub022:0/16] 2024-02-01 12:47:46,692 (trainer:737) INFO: 18epoch:train:2701-2800batch: iter_time=9.547e-05, forward_time=0.403, loss_ctc=42.835, loss_att=39.133, acc=0.736, loss=40.244, backward_time=0.457, grad_norm=31.739, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.412e-04, train_time=1.788 +[gpub022:0/16] 2024-02-01 12:50:36,093 (trainer:737) INFO: 18epoch:train:2801-2900batch: iter_time=5.605e-04, forward_time=0.413, loss_ctc=53.035, loss_att=51.078, acc=0.721, loss=51.665, backward_time=0.441, grad_norm=34.700, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.412e-04, train_time=1.694 +[gpub022:0/16] 2024-02-01 12:53:04,964 (trainer:737) INFO: 18epoch:train:2901-3000batch: iter_time=4.883e-04, forward_time=0.321, loss_ctc=57.212, loss_att=51.049, acc=0.728, loss=52.898, backward_time=0.409, grad_norm=44.774, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.412e-04, train_time=1.488 +[gpub022:0/16] 2024-02-01 12:55:51,580 (trainer:737) INFO: 18epoch:train:3001-3100batch: iter_time=9.796e-05, forward_time=0.400, loss_ctc=50.235, loss_att=56.004, acc=0.711, loss=54.274, backward_time=0.477, grad_norm=35.356, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.411e-04, train_time=1.667 +[gpub022:0/16] 2024-02-01 12:58:50,628 (trainer:737) INFO: 18epoch:train:3101-3200batch: iter_time=2.460e-04, forward_time=0.308, loss_ctc=59.679, loss_att=48.622, acc=0.737, loss=51.939, backward_time=0.413, grad_norm=37.046, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.411e-04, train_time=1.790 +[gpub022:0/16] 2024-02-01 13:01:48,985 (trainer:737) INFO: 18epoch:train:3201-3300batch: iter_time=6.294e-04, forward_time=0.400, loss_ctc=48.024, loss_att=46.648, acc=0.728, loss=47.061, backward_time=0.453, grad_norm=32.684, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.410e-04, train_time=1.783 +[gpub022:0/16] 2024-02-01 13:04:34,725 (trainer:737) INFO: 18epoch:train:3301-3400batch: iter_time=9.824e-05, forward_time=0.380, loss_ctc=56.817, loss_att=51.441, acc=0.730, loss=53.054, backward_time=0.458, grad_norm=35.837, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.410e-04, train_time=1.657 +[gpub022:0/16] 2024-02-01 13:07:21,297 (trainer:737) INFO: 18epoch:train:3401-3500batch: iter_time=7.920e-04, forward_time=0.379, loss_ctc=48.926, loss_att=46.838, acc=0.741, loss=47.465, backward_time=0.454, grad_norm=32.731, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.409e-04, train_time=1.666 +[gpub022:0/16] 2024-02-01 13:10:10,638 (trainer:737) INFO: 18epoch:train:3501-3600batch: iter_time=1.050e-04, forward_time=0.309, loss_ctc=55.158, loss_att=55.168, acc=0.725, loss=55.165, backward_time=0.406, grad_norm=37.344, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.409e-04, train_time=1.693 +[gpub022:0/16] 2024-02-01 13:12:52,135 (trainer:737) INFO: 18epoch:train:3601-3700batch: iter_time=4.880e-04, forward_time=0.377, loss_ctc=46.553, loss_att=43.691, acc=0.716, loss=44.549, backward_time=0.484, grad_norm=34.937, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.408e-04, train_time=1.615 +[gpub022:0/16] 2024-02-01 13:14:32,584 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub022:0/16] 2024-02-01 13:14:51,182 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 13:14:54,680 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 13:14:54,680 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub022:0/16] 2024-02-01 13:14:54,685 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 13:24:29,092 (trainer:737) INFO: 18epoch:train:3701-3800batch: iter_time=5.151, forward_time=0.471, loss_ctc=44.719, loss_att=48.120, acc=0.714, loss=47.100, backward_time=0.458, grad_norm=32.221, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.408e-04, train_time=6.969 +[gpub022:0/16] 2024-02-01 13:28:02,099 (trainer:737) INFO: 18epoch:train:3801-3900batch: iter_time=0.002, forward_time=0.611, loss_ctc=51.734, loss_att=46.755, acc=0.738, loss=48.249, backward_time=0.515, grad_norm=32.531, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=2.407e-04, train_time=2.130 +[gpub022:0/16] 2024-02-01 13:31:14,981 (trainer:737) INFO: 18epoch:train:3901-4000batch: iter_time=4.878e-04, forward_time=0.476, loss_ctc=48.361, loss_att=44.549, acc=0.735, loss=45.693, backward_time=0.537, grad_norm=30.835, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=2.407e-04, train_time=1.928 +[gpub022:0/16] 2024-02-01 13:34:31,168 (trainer:737) INFO: 18epoch:train:4001-4100batch: iter_time=3.124e-04, forward_time=0.601, loss_ctc=42.754, loss_att=39.246, acc=0.721, loss=40.299, backward_time=0.571, grad_norm=31.523, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.117, optim0_lr0=2.406e-04, train_time=1.962 +[gpub022:0/16] 2024-02-01 13:37:38,124 (trainer:737) INFO: 18epoch:train:4101-4200batch: iter_time=6.845e-04, forward_time=0.629, loss_ctc=55.222, loss_att=54.130, acc=0.724, loss=54.458, backward_time=0.514, grad_norm=41.521, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=2.406e-04, train_time=1.869 +[gpub022:0/16] 2024-02-01 13:40:36,200 (trainer:737) INFO: 18epoch:train:4201-4300batch: iter_time=2.561e-04, forward_time=0.536, loss_ctc=56.510, loss_att=51.163, acc=0.736, loss=52.767, backward_time=0.487, grad_norm=35.046, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.114, optim0_lr0=2.406e-04, train_time=1.780 +[gpub022:0/16] 2024-02-01 13:43:46,179 (trainer:737) INFO: 18epoch:train:4301-4400batch: iter_time=7.856e-04, forward_time=0.543, loss_ctc=51.580, loss_att=49.804, acc=0.729, loss=50.337, backward_time=0.539, grad_norm=33.734, clip=100.000, loss_scale=7.529e+33, optim_step_time=0.109, optim0_lr0=2.405e-04, train_time=1.900 +[gpub022:0/16] 2024-02-01 13:46:47,387 (trainer:737) INFO: 18epoch:train:4401-4500batch: iter_time=4.705e-04, forward_time=0.531, loss_ctc=53.390, loss_att=47.620, acc=0.726, loss=49.351, backward_time=0.523, grad_norm=35.965, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.405e-04, train_time=1.812 +[gpub022:0/16] 2024-02-01 13:49:51,823 (trainer:737) INFO: 18epoch:train:4501-4600batch: iter_time=7.254e-04, forward_time=0.524, loss_ctc=54.679, loss_att=53.386, acc=0.734, loss=53.774, backward_time=0.487, grad_norm=36.717, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=2.404e-04, train_time=1.844 +[gpub022:0/16] 2024-02-01 13:52:43,675 (trainer:737) INFO: 18epoch:train:4601-4700batch: iter_time=4.744e-04, forward_time=0.496, loss_ctc=48.621, loss_att=45.977, acc=0.724, loss=46.770, backward_time=0.450, grad_norm=31.911, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.404e-04, train_time=1.718 +[gpub022:0/16] 2024-02-01 13:56:05,786 (trainer:737) INFO: 18epoch:train:4701-4800batch: iter_time=5.492e-04, forward_time=0.525, loss_ctc=54.049, loss_att=48.727, acc=0.757, loss=50.323, backward_time=0.505, grad_norm=33.531, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=2.403e-04, train_time=2.021 +[gpub022:0/16] 2024-02-01 13:59:03,660 (trainer:737) INFO: 18epoch:train:4801-4900batch: iter_time=0.001, forward_time=0.560, loss_ctc=52.783, loss_att=54.046, acc=0.706, loss=53.667, backward_time=0.499, grad_norm=38.868, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.110, optim0_lr0=2.403e-04, train_time=1.778 +[gpub022:0/16] 2024-02-01 14:01:19,229 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 14:01:46,705 (trainer:737) INFO: 18epoch:train:4901-5000batch: iter_time=4.153e-04, forward_time=0.440, loss_ctc=44.030, loss_att=43.703, acc=0.714, loss=43.801, backward_time=0.452, grad_norm=30.207, clip=100.000, loss_scale=9.336e+33, optim_step_time=0.103, optim0_lr0=2.402e-04, train_time=1.630 +[gpub022:0/16] 2024-02-01 14:02:07,104 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub022:0/16] 2024-02-01 14:02:25,381 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 14:02:28,958 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 14:02:28,958 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub022:0/16] 2024-02-01 14:02:29,167 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 14:10:35,135 (trainer:737) INFO: 18epoch:train:5001-5100batch: iter_time=3.400, forward_time=0.525, loss_ctc=44.726, loss_att=45.727, acc=0.748, loss=45.426, backward_time=0.467, grad_norm=31.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.402e-04, train_time=5.285 +[gpub022:0/16] 2024-02-01 14:13:37,812 (trainer:737) INFO: 18epoch:train:5101-5200batch: iter_time=1.998e-04, forward_time=0.542, loss_ctc=52.339, loss_att=50.060, acc=0.735, loss=50.743, backward_time=0.518, grad_norm=30.676, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.123, optim0_lr0=2.401e-04, train_time=1.827 +[gpub022:0/16] 2024-02-01 14:16:16,973 (trainer:737) INFO: 18epoch:train:5201-5300batch: iter_time=5.770e-04, forward_time=0.445, loss_ctc=42.368, loss_att=38.062, acc=0.741, loss=39.354, backward_time=0.454, grad_norm=30.738, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.401e-04, train_time=1.590 +[gpub022:0/16] 2024-02-01 14:18:55,583 (trainer:737) INFO: 18epoch:train:5301-5400batch: iter_time=2.243e-04, forward_time=0.398, loss_ctc=52.167, loss_att=50.368, acc=0.736, loss=50.908, backward_time=0.465, grad_norm=33.531, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.400e-04, train_time=1.586 +[gpub022:0/16] 2024-02-01 14:19:36,396 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 14:22:00,491 (trainer:737) INFO: 18epoch:train:5401-5500batch: iter_time=3.262e-04, forward_time=0.440, loss_ctc=56.966, loss_att=50.850, acc=0.738, loss=52.684, backward_time=0.444, grad_norm=43.780, clip=100.000, loss_scale=3.252e+33, optim_step_time=0.100, optim0_lr0=2.400e-04, train_time=1.849 +[gpub022:0/16] 2024-02-01 14:24:41,046 (trainer:737) INFO: 18epoch:train:5501-5600batch: iter_time=2.583e-04, forward_time=0.424, loss_ctc=50.203, loss_att=55.557, acc=0.723, loss=53.951, backward_time=0.463, grad_norm=34.582, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.400e-04, train_time=1.606 +[gpub022:0/16] 2024-02-01 14:27:21,446 (trainer:737) INFO: 18epoch:train:5601-5700batch: iter_time=0.001, forward_time=0.414, loss_ctc=57.978, loss_att=49.854, acc=0.738, loss=52.291, backward_time=0.482, grad_norm=35.675, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.399e-04, train_time=1.604 +[gpub022:0/16] 2024-02-01 14:30:08,500 (trainer:737) INFO: 18epoch:train:5701-5800batch: iter_time=3.693e-04, forward_time=0.424, loss_ctc=48.199, loss_att=47.643, acc=0.744, loss=47.810, backward_time=0.473, grad_norm=31.027, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.399e-04, train_time=1.670 +[gpub022:0/16] 2024-02-01 14:32:51,000 (trainer:737) INFO: 18epoch:train:5801-5900batch: iter_time=3.874e-04, forward_time=0.372, loss_ctc=56.319, loss_att=51.664, acc=0.740, loss=53.061, backward_time=0.442, grad_norm=37.910, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.398e-04, train_time=1.625 +[gpub022:0/16] 2024-02-01 14:35:34,752 (trainer:737) INFO: 18epoch:train:5901-6000batch: iter_time=2.337e-04, forward_time=0.531, loss_ctc=48.656, loss_att=48.294, acc=0.751, loss=48.402, backward_time=0.459, grad_norm=31.391, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.398e-04, train_time=1.637 +[gpub022:0/16] 2024-02-01 14:38:16,940 (trainer:737) INFO: 18epoch:train:6001-6100batch: iter_time=4.129e-04, forward_time=0.384, loss_ctc=54.557, loss_att=55.376, acc=0.733, loss=55.131, backward_time=0.455, grad_norm=35.836, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.397e-04, train_time=1.622 +[gpub022:0/16] 2024-02-01 14:41:03,811 (trainer:737) INFO: 18epoch:train:6101-6200batch: iter_time=1.770e-04, forward_time=0.387, loss_ctc=45.659, loss_att=44.386, acc=0.724, loss=44.767, backward_time=0.428, grad_norm=33.532, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=2.397e-04, train_time=1.668 +[gpub022:0/16] 2024-02-01 14:42:39,827 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub022:0/16] 2024-02-01 14:42:58,303 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 14:43:02,367 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 14:43:02,368 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub022:0/16] 2024-02-01 14:43:02,371 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 14:51:41,179 (trainer:737) INFO: 18epoch:train:6201-6300batch: iter_time=3.506, forward_time=0.341, loss_ctc=44.400, loss_att=47.922, acc=0.726, loss=46.866, backward_time=0.410, grad_norm=32.011, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.396e-04, train_time=6.374 +[gpub022:0/16] 2024-02-01 14:54:21,033 (trainer:737) INFO: 18epoch:train:6301-6400batch: iter_time=8.929e-05, forward_time=0.295, loss_ctc=51.706, loss_att=47.537, acc=0.751, loss=48.787, backward_time=0.405, grad_norm=31.924, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.396e-04, train_time=1.598 +[gpub022:0/16] 2024-02-01 14:57:15,751 (trainer:737) INFO: 18epoch:train:6401-6500batch: iter_time=8.671e-05, forward_time=0.294, loss_ctc=48.551, loss_att=44.087, acc=0.745, loss=45.427, backward_time=0.405, grad_norm=30.907, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.395e-04, train_time=1.747 +[gpub022:0/16] 2024-02-01 15:00:01,020 (trainer:737) INFO: 18epoch:train:6501-6600batch: iter_time=7.717e-04, forward_time=0.382, loss_ctc=42.283, loss_att=37.509, acc=0.735, loss=38.941, backward_time=0.436, grad_norm=30.969, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=2.395e-04, train_time=1.652 +[gpub022:0/16] 2024-02-01 15:02:28,519 (trainer:737) INFO: 18epoch:train:6601-6700batch: iter_time=8.786e-05, forward_time=0.292, loss_ctc=55.225, loss_att=55.297, acc=0.731, loss=55.275, backward_time=0.406, grad_norm=43.972, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.394e-04, train_time=1.474 +[gpub022:0/16] 2024-02-01 15:05:02,068 (trainer:737) INFO: 18epoch:train:6701-6800batch: iter_time=8.663e-05, forward_time=0.296, loss_ctc=56.050, loss_att=50.976, acc=0.742, loss=52.498, backward_time=0.403, grad_norm=37.400, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.394e-04, train_time=1.536 +[gpub022:0/16] 2024-02-01 15:07:52,357 (trainer:737) INFO: 18epoch:train:6801-6900batch: iter_time=8.609e-05, forward_time=0.299, loss_ctc=50.304, loss_att=49.199, acc=0.740, loss=49.530, backward_time=0.404, grad_norm=32.537, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.394e-04, train_time=1.703 +[gpub022:0/16] 2024-02-01 15:10:45,862 (trainer:737) INFO: 18epoch:train:6901-7000batch: iter_time=3.008e-04, forward_time=0.430, loss_ctc=52.460, loss_att=48.139, acc=0.735, loss=49.435, backward_time=0.451, grad_norm=33.127, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.393e-04, train_time=1.735 +[gpub022:0/16] 2024-02-01 15:13:04,459 (trainer:737) INFO: 18epoch:train:7001-7100batch: iter_time=8.972e-05, forward_time=0.292, loss_ctc=54.641, loss_att=54.628, acc=0.740, loss=54.632, backward_time=0.404, grad_norm=36.980, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.393e-04, train_time=1.385 +[gpub022:0/16] 2024-02-01 15:15:33,325 (trainer:737) INFO: 18epoch:train:7101-7200batch: iter_time=8.448e-05, forward_time=0.293, loss_ctc=48.606, loss_att=46.359, acc=0.740, loss=47.033, backward_time=0.404, grad_norm=30.737, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.392e-04, train_time=1.489 +[gpub022:0/16] 2024-02-01 15:18:27,163 (trainer:737) INFO: 18epoch:train:7201-7300batch: iter_time=8.927e-05, forward_time=0.316, loss_ctc=53.661, loss_att=50.104, acc=0.758, loss=51.171, backward_time=0.409, grad_norm=32.791, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.392e-04, train_time=1.738 +[gpub022:0/16] 2024-02-01 15:21:23,995 (trainer:737) INFO: 18epoch:train:7301-7400batch: iter_time=3.474e-04, forward_time=0.430, loss_ctc=52.210, loss_att=54.493, acc=0.714, loss=53.808, backward_time=0.442, grad_norm=38.626, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.391e-04, train_time=1.768 +[gpub022:0/16] 2024-02-01 15:23:38,699 (trainer:737) INFO: 18epoch:train:7401-7500batch: iter_time=1.009e-04, forward_time=0.288, loss_ctc=43.496, loss_att=42.993, acc=0.733, loss=43.144, backward_time=0.403, grad_norm=30.973, clip=100.000, loss_scale=4.517e+33, optim_step_time=0.092, optim0_lr0=2.391e-04, train_time=1.346 +[gpub022:0/16] 2024-02-01 15:23:58,728 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub022:0/16] 2024-02-01 15:24:17,662 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 15:24:21,190 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 15:24:21,190 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub022:0/16] 2024-02-01 15:24:21,193 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 15:32:01,290 (trainer:737) INFO: 18epoch:train:7501-7600batch: iter_time=3.267, forward_time=0.293, loss_ctc=44.966, loss_att=47.270, acc=0.737, loss=46.579, backward_time=0.407, grad_norm=34.502, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.390e-04, train_time=5.027 +[gpub022:0/16] 2024-02-01 15:34:33,752 (trainer:737) INFO: 18epoch:train:7601-7700batch: iter_time=8.055e-05, forward_time=0.420, loss_ctc=52.584, loss_att=49.237, acc=0.723, loss=50.241, backward_time=0.444, grad_norm=33.461, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.390e-04, train_time=1.524 +[gpub022:0/16] 2024-02-01 15:37:05,908 (trainer:737) INFO: 18epoch:train:7701-7800batch: iter_time=8.664e-05, forward_time=0.291, loss_ctc=42.649, loss_att=38.348, acc=0.742, loss=39.638, backward_time=0.403, grad_norm=29.991, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.389e-04, train_time=1.521 +[gpub022:0/16] 2024-02-01 15:39:32,951 (trainer:737) INFO: 18epoch:train:7801-7900batch: iter_time=8.901e-05, forward_time=0.295, loss_ctc=51.709, loss_att=50.120, acc=0.725, loss=50.597, backward_time=0.404, grad_norm=33.836, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.389e-04, train_time=1.470 +[gpub022:0/16] 2024-02-01 15:41:48,242 (trainer:737) INFO: 18epoch:train:7901-8000batch: iter_time=9.018e-05, forward_time=0.301, loss_ctc=56.787, loss_att=50.953, acc=0.731, loss=52.703, backward_time=0.405, grad_norm=45.857, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.389e-04, train_time=1.353 +[gpub022:0/16] 2024-02-01 15:44:42,132 (trainer:737) INFO: 18epoch:train:8001-8100batch: iter_time=6.428e-04, forward_time=0.408, loss_ctc=50.156, loss_att=55.390, acc=0.713, loss=53.820, backward_time=0.468, grad_norm=36.054, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.388e-04, train_time=1.739 +[gpub022:0/16] 2024-02-01 15:47:14,207 (trainer:737) INFO: 18epoch:train:8101-8200batch: iter_time=9.009e-05, forward_time=0.291, loss_ctc=56.807, loss_att=48.024, acc=0.741, loss=50.659, backward_time=0.404, grad_norm=34.794, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.388e-04, train_time=1.520 +[gpub022:0/16] 2024-02-01 15:49:32,432 (trainer:737) INFO: 18epoch:train:8201-8300batch: iter_time=7.945e-05, forward_time=0.321, loss_ctc=47.885, loss_att=46.122, acc=0.731, loss=46.650, backward_time=0.403, grad_norm=32.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.387e-04, train_time=1.381 +[gpub022:0/16] 2024-02-01 15:52:16,561 (trainer:737) INFO: 18epoch:train:8301-8400batch: iter_time=9.548e-05, forward_time=0.298, loss_ctc=56.331, loss_att=51.528, acc=0.735, loss=52.969, backward_time=0.406, grad_norm=36.903, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.387e-04, train_time=1.642 +[gpub022:0/16] 2024-02-01 15:55:12,494 (trainer:737) INFO: 18epoch:train:8401-8500batch: iter_time=8.955e-05, forward_time=0.414, loss_ctc=48.518, loss_att=47.070, acc=0.742, loss=47.504, backward_time=0.455, grad_norm=32.091, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.386e-04, train_time=1.759 +[gpub022:0/16] 2024-02-01 15:57:40,881 (trainer:737) INFO: 18epoch:train:8501-8600batch: iter_time=8.531e-05, forward_time=0.290, loss_ctc=54.279, loss_att=54.709, acc=0.731, loss=54.580, backward_time=0.405, grad_norm=37.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.386e-04, train_time=1.483 +[gpub022:0/16] 2024-02-01 16:00:10,064 (trainer:737) INFO: 18epoch:train:8601-8700batch: iter_time=9.312e-05, forward_time=0.294, loss_ctc=45.220, loss_att=44.198, acc=0.714, loss=44.505, backward_time=0.400, grad_norm=33.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.385e-04, train_time=1.491 +[gpub022:0/16] 2024-02-01 16:01:40,251 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub022:0/16] 2024-02-01 16:01:59,046 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 16:02:02,495 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 16:02:02,495 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub022:0/16] 2024-02-01 16:02:02,553 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 16:09:00,028 (trainer:737) INFO: 18epoch:train:8701-8800batch: iter_time=3.672, forward_time=0.362, loss_ctc=44.066, loss_att=47.285, acc=0.717, loss=46.319, backward_time=0.419, grad_norm=31.011, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.385e-04, train_time=5.300 +[gpub022:0/16] 2024-02-01 16:11:15,943 (trainer:737) INFO: 18epoch:train:8801-8900batch: iter_time=7.941e-05, forward_time=0.290, loss_ctc=51.050, loss_att=46.053, acc=0.742, loss=47.552, backward_time=0.405, grad_norm=32.258, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.384e-04, train_time=1.359 +[gpub022:0/16] 2024-02-01 16:13:34,512 (trainer:737) INFO: 18epoch:train:8901-9000batch: iter_time=8.454e-05, forward_time=0.295, loss_ctc=47.489, loss_att=43.743, acc=0.738, loss=44.867, backward_time=0.403, grad_norm=30.626, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.384e-04, train_time=1.385 +[gpub022:0/16] 2024-02-01 16:16:22,864 (trainer:737) INFO: 18epoch:train:9001-9100batch: iter_time=8.157e-05, forward_time=0.379, loss_ctc=42.212, loss_att=38.044, acc=0.727, loss=39.294, backward_time=0.421, grad_norm=31.080, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.384e-04, train_time=1.684 +[gpub022:0/16] 2024-02-01 16:18:43,412 (trainer:737) INFO: 18epoch:train:9101-9200batch: iter_time=8.485e-05, forward_time=0.313, loss_ctc=55.913, loss_att=56.171, acc=0.724, loss=56.093, backward_time=0.406, grad_norm=44.941, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.383e-04, train_time=1.405 +[gpub022:0/16] 2024-02-01 16:20:58,600 (trainer:737) INFO: 18epoch:train:9201-9300batch: iter_time=8.669e-05, forward_time=0.296, loss_ctc=55.014, loss_att=51.309, acc=0.737, loss=52.421, backward_time=0.404, grad_norm=34.195, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.383e-04, train_time=1.351 +[gpub022:0/16] 2024-02-01 16:23:54,196 (trainer:737) INFO: 18epoch:train:9301-9400batch: iter_time=8.351e-05, forward_time=0.364, loss_ctc=50.429, loss_att=49.259, acc=0.733, loss=49.610, backward_time=0.445, grad_norm=33.047, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.382e-04, train_time=1.756 +[gpub022:0/16] 2024-02-01 16:24:51,025 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 16:25:55,999 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 16:26:17,992 (trainer:737) INFO: 18epoch:train:9401-9500batch: iter_time=8.326e-05, forward_time=0.289, loss_ctc=51.812, loss_att=46.914, acc=0.730, loss=48.383, backward_time=0.403, grad_norm=33.490, clip=100.000, loss_scale=5.537e+33, optim_step_time=0.092, optim0_lr0=2.382e-04, train_time=1.438 +[gpub022:0/16] 2024-02-01 16:27:25,017 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 16:28:36,467 (trainer:737) INFO: 18epoch:train:9501-9600batch: iter_time=8.177e-05, forward_time=0.291, loss_ctc=53.787, loss_att=52.586, acc=0.738, loss=52.946, backward_time=0.404, grad_norm=37.410, clip=100.000, loss_scale=1.888e+33, optim_step_time=0.092, optim0_lr0=2.381e-04, train_time=1.385 +[gpub022:0/16] 2024-02-01 16:31:13,316 (trainer:737) INFO: 18epoch:train:9601-9700batch: iter_time=8.024e-05, forward_time=0.294, loss_ctc=48.444, loss_att=46.138, acc=0.728, loss=46.830, backward_time=0.399, grad_norm=32.364, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.381e-04, train_time=1.567 +[gpub022:0/16] 2024-02-01 16:33:55,719 (trainer:737) INFO: 18epoch:train:9701-9800batch: iter_time=8.130e-05, forward_time=0.389, loss_ctc=53.145, loss_att=47.759, acc=0.761, loss=49.375, backward_time=0.439, grad_norm=32.567, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.380e-04, train_time=1.625 +[gpub022:0/16] 2024-02-01 16:36:20,739 (trainer:737) INFO: 18epoch:train:9801-9900batch: iter_time=9.018e-05, forward_time=0.291, loss_ctc=52.104, loss_att=54.008, acc=0.707, loss=53.437, backward_time=0.404, grad_norm=39.162, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.380e-04, train_time=1.450 +[gpub022:0/16] 2024-02-01 16:38:55,609 (trainer:737) INFO: 18epoch:train:9901-10000batch: iter_time=8.735e-05, forward_time=0.294, loss_ctc=42.969, loss_att=42.764, acc=0.722, loss=42.826, backward_time=0.400, grad_norm=29.788, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.380e-04, train_time=1.548 +[gpub022:0/16] 2024-02-01 16:39:15,649 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub022:0/16] 2024-02-01 16:39:34,709 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 16:39:38,308 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 16:39:38,308 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub022:0/16] 2024-02-01 16:39:38,312 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 16:47:36,323 (trainer:737) INFO: 18epoch:train:10001-10100batch: iter_time=3.509, forward_time=0.392, loss_ctc=44.803, loss_att=45.612, acc=0.752, loss=45.369, backward_time=0.420, grad_norm=32.019, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.095, optim0_lr0=2.379e-04, train_time=5.207 +[gpub022:0/16] 2024-02-01 16:50:18,080 (trainer:737) INFO: 18epoch:train:10101-10200batch: iter_time=8.502e-05, forward_time=0.295, loss_ctc=52.663, loss_att=49.926, acc=0.738, loss=50.747, backward_time=0.404, grad_norm=33.050, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.379e-04, train_time=1.617 +[gpub022:0/16] 2024-02-01 16:53:39,153 (trainer:737) INFO: 18epoch:train:10201-10300batch: iter_time=9.233e-05, forward_time=0.388, loss_ctc=42.094, loss_att=38.472, acc=0.741, loss=39.559, backward_time=0.431, grad_norm=30.429, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.101, optim0_lr0=2.378e-04, train_time=2.010 +[gpub022:0/16] 2024-02-01 16:56:09,787 (trainer:737) INFO: 18epoch:train:10301-10400batch: iter_time=9.708e-05, forward_time=0.292, loss_ctc=51.857, loss_att=49.922, acc=0.737, loss=50.503, backward_time=0.406, grad_norm=34.099, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.378e-04, train_time=1.506 +[gpub022:0/16] 2024-02-01 16:59:04,242 (trainer:737) INFO: 18epoch:train:10401-10500batch: iter_time=9.021e-05, forward_time=0.295, loss_ctc=58.126, loss_att=53.032, acc=0.741, loss=54.560, backward_time=0.405, grad_norm=47.221, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.377e-04, train_time=1.744 +[gpub022:0/16] 2024-02-01 17:01:36,554 (trainer:737) INFO: 18epoch:train:10501-10600batch: iter_time=8.200e-04, forward_time=0.396, loss_ctc=49.246, loss_att=55.295, acc=0.727, loss=53.480, backward_time=0.457, grad_norm=34.067, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.102, optim0_lr0=2.377e-04, train_time=1.523 +[gpub022:0/16] 2024-02-01 17:04:27,457 (trainer:737) INFO: 18epoch:train:10601-10700batch: iter_time=8.994e-05, forward_time=0.290, loss_ctc=56.646, loss_att=49.934, acc=0.741, loss=51.948, backward_time=0.403, grad_norm=34.648, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.376e-04, train_time=1.709 +[gpub022:0/16] 2024-02-01 17:05:08,353 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 17:07:18,675 (trainer:737) INFO: 18epoch:train:10701-10800batch: iter_time=8.724e-05, forward_time=0.302, loss_ctc=47.419, loss_att=47.436, acc=0.746, loss=47.431, backward_time=0.402, grad_norm=31.369, clip=100.000, loss_scale=8.457e+32, optim_step_time=0.092, optim0_lr0=2.376e-04, train_time=1.711 +[gpub022:0/16] 2024-02-01 17:10:05,159 (trainer:737) INFO: 18epoch:train:10801-10900batch: iter_time=8.553e-05, forward_time=0.348, loss_ctc=55.973, loss_att=51.687, acc=0.741, loss=52.973, backward_time=0.466, grad_norm=37.382, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.099, optim0_lr0=2.375e-04, train_time=1.665 +[gpub022:0/16] 2024-02-01 17:12:33,571 (trainer:737) INFO: 18epoch:train:10901-11000batch: iter_time=8.895e-05, forward_time=0.290, loss_ctc=48.347, loss_att=47.830, acc=0.753, loss=47.985, backward_time=0.403, grad_norm=31.948, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.375e-04, train_time=1.484 +[gpub022:0/16] 2024-02-01 17:15:22,368 (trainer:737) INFO: 18epoch:train:11001-11100batch: iter_time=9.771e-05, forward_time=0.297, loss_ctc=53.910, loss_att=54.308, acc=0.739, loss=54.188, backward_time=0.405, grad_norm=36.260, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.093, optim0_lr0=2.375e-04, train_time=1.688 +[gpub022:0/16] 2024-02-01 17:18:17,246 (trainer:737) INFO: 18epoch:train:11101-11200batch: iter_time=9.242e-05, forward_time=0.365, loss_ctc=45.150, loss_att=43.469, acc=0.730, loss=43.973, backward_time=0.456, grad_norm=33.670, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.099, optim0_lr0=2.374e-04, train_time=1.749 +[gpub022:0/16] 2024-02-01 17:19:52,973 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub022:0/16] 2024-02-01 17:20:11,669 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 17:20:15,266 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 17:20:15,267 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub022:0/16] 2024-02-01 17:20:15,270 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 17:26:01,065 (trainer:737) INFO: 18epoch:train:11201-11300batch: iter_time=2.997, forward_time=0.411, loss_ctc=44.062, loss_att=47.627, acc=0.726, loss=46.557, backward_time=0.425, grad_norm=31.853, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.095, optim0_lr0=2.374e-04, train_time=4.638 +[gpub022:0/16] 2024-02-01 17:28:41,250 (trainer:737) INFO: 18epoch:train:11301-11400batch: iter_time=7.748e-05, forward_time=0.296, loss_ctc=50.668, loss_att=46.828, acc=0.742, loss=47.980, backward_time=0.405, grad_norm=31.055, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.093, optim0_lr0=2.373e-04, train_time=1.602 +[gpub022:0/16] 2024-02-01 17:31:26,473 (trainer:737) INFO: 18epoch:train:11401-11500batch: iter_time=2.073e-04, forward_time=0.360, loss_ctc=48.065, loss_att=44.662, acc=0.737, loss=45.683, backward_time=0.447, grad_norm=32.719, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.096, optim0_lr0=2.373e-04, train_time=1.652 +[gpub022:0/16] 2024-02-01 17:33:40,369 (trainer:737) INFO: 18epoch:train:11501-11600batch: iter_time=9.395e-05, forward_time=0.286, loss_ctc=41.986, loss_att=37.949, acc=0.729, loss=39.160, backward_time=0.400, grad_norm=31.758, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.372e-04, train_time=1.339 +[gpub022:0/16] 2024-02-01 17:36:07,402 (trainer:737) INFO: 18epoch:train:11601-11700batch: iter_time=9.050e-05, forward_time=0.303, loss_ctc=54.024, loss_att=54.948, acc=0.723, loss=54.671, backward_time=0.407, grad_norm=43.908, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.372e-04, train_time=1.470 +[gpub022:0/16] 2024-02-01 17:38:43,785 (trainer:737) INFO: 18epoch:train:11701-11800batch: iter_time=9.383e-05, forward_time=0.361, loss_ctc=55.160, loss_att=50.888, acc=0.739, loss=52.170, backward_time=0.451, grad_norm=35.654, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.102, optim0_lr0=2.371e-04, train_time=1.563 +[gpub022:0/16] 2024-02-01 17:41:19,109 (trainer:737) INFO: 18epoch:train:11801-11900batch: iter_time=8.492e-05, forward_time=0.290, loss_ctc=49.900, loss_att=48.914, acc=0.734, loss=49.210, backward_time=0.403, grad_norm=33.129, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.371e-04, train_time=1.553 +[gpub022:0/16] 2024-02-01 17:43:47,895 (trainer:737) INFO: 18epoch:train:11901-12000batch: iter_time=8.958e-05, forward_time=0.291, loss_ctc=51.797, loss_att=46.558, acc=0.732, loss=48.129, backward_time=0.409, grad_norm=34.079, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.371e-04, train_time=1.488 +[gpub022:0/16] 2024-02-01 17:46:09,240 (trainer:737) INFO: 18epoch:train:12001-12100batch: iter_time=8.934e-05, forward_time=0.367, loss_ctc=52.887, loss_att=52.346, acc=0.737, loss=52.508, backward_time=0.430, grad_norm=38.132, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.098, optim0_lr0=2.370e-04, train_time=1.412 +[gpub022:0/16] 2024-02-01 17:48:29,984 (trainer:737) INFO: 18epoch:train:12101-12200batch: iter_time=8.550e-05, forward_time=0.288, loss_ctc=48.318, loss_att=46.400, acc=0.728, loss=46.975, backward_time=0.401, grad_norm=33.381, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.370e-04, train_time=1.408 +[gpub022:0/16] 2024-02-01 17:51:14,467 (trainer:737) INFO: 18epoch:train:12201-12300batch: iter_time=9.433e-05, forward_time=0.291, loss_ctc=53.270, loss_att=48.238, acc=0.762, loss=49.748, backward_time=0.414, grad_norm=33.655, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.093, optim0_lr0=2.369e-04, train_time=1.645 +[gpub022:0/16] 2024-02-01 17:53:37,896 (trainer:737) INFO: 18epoch:train:12301-12400batch: iter_time=0.001, forward_time=0.363, loss_ctc=51.651, loss_att=53.590, acc=0.708, loss=53.008, backward_time=0.435, grad_norm=38.387, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.102, optim0_lr0=2.369e-04, train_time=1.433 +[gpub022:0/16] 2024-02-01 17:55:48,481 (trainer:737) INFO: 18epoch:train:12401-12500batch: iter_time=9.029e-05, forward_time=0.289, loss_ctc=43.055, loss_att=42.817, acc=0.719, loss=42.888, backward_time=0.402, grad_norm=30.092, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.092, optim0_lr0=2.368e-04, train_time=1.307 +[gpub022:0/16] 2024-02-01 17:56:08,508 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub022:0/16] 2024-02-01 17:56:27,788 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 17:56:31,325 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 17:56:31,325 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub022:0/16] 2024-02-01 17:56:31,328 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 18:04:10,788 (trainer:737) INFO: 18epoch:train:12501-12600batch: iter_time=3.459, forward_time=0.299, loss_ctc=44.847, loss_att=45.249, acc=0.752, loss=45.128, backward_time=0.411, grad_norm=31.535, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.093, optim0_lr0=2.368e-04, train_time=5.023 +[gpub022:0/16] 2024-02-01 18:06:42,416 (trainer:737) INFO: 18epoch:train:12601-12700batch: iter_time=7.830e-05, forward_time=0.396, loss_ctc=52.625, loss_att=49.797, acc=0.738, loss=50.646, backward_time=0.426, grad_norm=32.277, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.110, optim0_lr0=2.367e-04, train_time=1.516 +[gpub022:0/16] 2024-02-01 18:09:33,281 (trainer:737) INFO: 18epoch:train:12701-12800batch: iter_time=8.760e-05, forward_time=0.287, loss_ctc=42.113, loss_att=37.960, acc=0.743, loss=39.206, backward_time=0.397, grad_norm=29.981, clip=100.000, loss_scale=1.097e+33, optim_step_time=0.092, optim0_lr0=2.367e-04, train_time=1.707 +[gpub022:0/16] 2024-02-01 18:11:52,411 (trainer:737) INFO: 18epoch:train:12801-12900batch: iter_time=8.633e-05, forward_time=0.302, loss_ctc=50.662, loss_att=49.346, acc=0.738, loss=49.741, backward_time=0.409, grad_norm=32.975, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.367e-04, train_time=1.392 +[gpub022:0/16] 2024-02-01 18:14:43,105 (trainer:737) INFO: 18epoch:train:12901-13000batch: iter_time=8.366e-05, forward_time=0.348, loss_ctc=56.010, loss_att=50.338, acc=0.743, loss=52.039, backward_time=0.432, grad_norm=42.572, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.102, optim0_lr0=2.366e-04, train_time=1.707 +[gpub022:0/16] 2024-02-01 18:17:13,200 (trainer:737) INFO: 18epoch:train:13001-13100batch: iter_time=9.542e-05, forward_time=0.317, loss_ctc=49.081, loss_att=54.965, acc=0.728, loss=53.200, backward_time=0.407, grad_norm=34.555, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.366e-04, train_time=1.501 +[gpub022:0/16] 2024-02-01 18:19:31,104 (trainer:737) INFO: 18epoch:train:13101-13200batch: iter_time=8.557e-05, forward_time=0.291, loss_ctc=55.471, loss_att=48.935, acc=0.744, loss=50.896, backward_time=0.404, grad_norm=33.485, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.365e-04, train_time=1.377 +[gpub022:0/16] 2024-02-01 18:22:07,062 (trainer:737) INFO: 18epoch:train:13201-13300batch: iter_time=8.892e-05, forward_time=0.300, loss_ctc=47.113, loss_att=46.775, acc=0.746, loss=46.876, backward_time=0.417, grad_norm=30.301, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.365e-04, train_time=1.561 +[gpub022:0/16] 2024-02-01 18:24:42,780 (trainer:737) INFO: 18epoch:train:13301-13400batch: iter_time=8.917e-05, forward_time=0.364, loss_ctc=54.952, loss_att=51.132, acc=0.744, loss=52.278, backward_time=0.428, grad_norm=36.411, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.102, optim0_lr0=2.364e-04, train_time=1.557 +[gpub022:0/16] 2024-02-01 18:27:25,253 (trainer:737) INFO: 18epoch:train:13401-13500batch: iter_time=8.739e-05, forward_time=0.290, loss_ctc=47.891, loss_att=47.504, acc=0.754, loss=47.620, backward_time=0.402, grad_norm=31.333, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.364e-04, train_time=1.625 +[gpub022:0/16] 2024-02-01 18:29:36,017 (trainer:737) INFO: 18epoch:train:13501-13600batch: iter_time=8.808e-05, forward_time=0.293, loss_ctc=53.404, loss_att=54.572, acc=0.737, loss=54.222, backward_time=0.406, grad_norm=35.713, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.364e-04, train_time=1.307 +[gpub022:0/16] 2024-02-01 18:32:07,824 (trainer:737) INFO: 18epoch:train:13601-13700batch: iter_time=8.543e-05, forward_time=0.306, loss_ctc=44.550, loss_att=43.870, acc=0.728, loss=44.074, backward_time=0.417, grad_norm=32.670, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.094, optim0_lr0=2.363e-04, train_time=1.519 +[gpub022:0/16] 2024-02-01 18:34:08,496 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub022:0/16] 2024-02-01 18:34:28,194 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 18:34:32,174 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 18:34:32,174 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub022:0/16] 2024-02-01 18:34:32,178 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 18:41:13,493 (trainer:737) INFO: 18epoch:train:13701-13800batch: iter_time=3.741, forward_time=0.395, loss_ctc=43.634, loss_att=48.315, acc=0.723, loss=46.911, backward_time=0.435, grad_norm=33.939, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.095, optim0_lr0=2.363e-04, train_time=5.456 +[gpub022:0/16] 2024-02-01 18:43:38,832 (trainer:737) INFO: 18epoch:train:13801-13900batch: iter_time=7.520e-05, forward_time=0.291, loss_ctc=50.819, loss_att=47.260, acc=0.740, loss=48.328, backward_time=0.403, grad_norm=32.511, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.362e-04, train_time=1.453 +[gpub022:0/16] 2024-02-01 18:46:27,690 (trainer:737) INFO: 18epoch:train:13901-14000batch: iter_time=7.743e-05, forward_time=0.368, loss_ctc=47.187, loss_att=44.558, acc=0.738, loss=45.347, backward_time=0.501, grad_norm=31.115, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.100, optim0_lr0=2.362e-04, train_time=1.687 +[gpub022:0/16] 2024-02-01 18:49:22,365 (trainer:737) INFO: 18epoch:train:14001-14100batch: iter_time=7.943e-05, forward_time=0.286, loss_ctc=41.527, loss_att=38.570, acc=0.726, loss=39.457, backward_time=0.397, grad_norm=32.121, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.361e-04, train_time=1.747 +[gpub022:0/16] 2024-02-01 18:51:50,412 (trainer:737) INFO: 18epoch:train:14101-14200batch: iter_time=7.848e-05, forward_time=0.355, loss_ctc=55.336, loss_att=54.786, acc=0.726, loss=54.951, backward_time=0.479, grad_norm=46.863, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.361e-04, train_time=1.481 +[gpub022:0/16] 2024-02-01 18:54:30,047 (trainer:737) INFO: 18epoch:train:14201-14300batch: iter_time=8.250e-05, forward_time=0.308, loss_ctc=54.607, loss_att=50.089, acc=0.740, loss=51.444, backward_time=0.402, grad_norm=34.247, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.092, optim0_lr0=2.360e-04, train_time=1.595 +[gpub022:0/16] 2024-02-01 18:57:22,137 (trainer:737) INFO: 18epoch:train:14301-14400batch: iter_time=8.208e-05, forward_time=0.289, loss_ctc=49.652, loss_att=48.952, acc=0.733, loss=49.162, backward_time=0.403, grad_norm=34.929, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.360e-04, train_time=1.721 +[gpub022:0/16] 2024-02-01 19:01:40,179 (trainer:737) INFO: 18epoch:train:14401-14500batch: iter_time=0.132, forward_time=0.867, loss_ctc=50.852, loss_att=46.516, acc=0.731, loss=47.816, backward_time=0.487, grad_norm=33.607, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.100, optim0_lr0=2.360e-04, train_time=2.580 +[gpub022:0/16] 2024-02-01 19:04:24,823 (trainer:737) INFO: 18epoch:train:14501-14600batch: iter_time=7.594e-05, forward_time=0.307, loss_ctc=52.560, loss_att=51.958, acc=0.737, loss=52.138, backward_time=0.404, grad_norm=36.894, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.101, optim0_lr0=2.359e-04, train_time=1.646 +[gpub022:0/16] 2024-02-01 19:06:51,155 (trainer:737) INFO: 18epoch:train:14601-14700batch: iter_time=7.554e-05, forward_time=0.288, loss_ctc=48.345, loss_att=46.124, acc=0.728, loss=46.790, backward_time=0.401, grad_norm=33.522, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.359e-04, train_time=1.464 +[gpub022:0/16] 2024-02-01 19:11:16,255 (trainer:737) INFO: 18epoch:train:14701-14800batch: iter_time=7.576e-05, forward_time=0.875, loss_ctc=52.916, loss_att=47.927, acc=0.761, loss=49.424, backward_time=0.536, grad_norm=33.195, clip=100.000, loss_scale=2.194e+33, optim_step_time=0.150, optim0_lr0=2.358e-04, train_time=2.650 +[gpub022:0/16] 2024-02-01 19:14:41,388 (trainer:737) INFO: 18epoch:train:14801-14900batch: iter_time=7.599e-05, forward_time=0.353, loss_ctc=51.316, loss_att=53.594, acc=0.709, loss=52.911, backward_time=0.409, grad_norm=40.573, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.358e-04, train_time=2.050 +[gpub022:0/16] 2024-02-01 19:16:53,337 (trainer:737) INFO: 18epoch:train:14901-15000batch: iter_time=8.034e-05, forward_time=0.288, loss_ctc=43.031, loss_att=42.695, acc=0.722, loss=42.796, backward_time=0.401, grad_norm=31.140, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.357e-04, train_time=1.321 +[gpub022:0/16] 2024-02-01 19:56:07,228 (trainer:343) INFO: 18epoch results: [train] iter_time=0.291, forward_time=0.381, loss_ctc=50.688, loss_att=48.608, acc=0.733, loss=49.232, backward_time=0.439, grad_norm=34.656, clip=100.000, loss_scale=4.107e+33, optim_step_time=0.098, optim0_lr0=2.391e-04, train_time=1.996, time=8 hours, 19 minutes and 22.1 seconds, total_count=300000, gpu_max_cached_mem_GB=41.891, [valid] loss_ctc=41.877, cer_ctc=0.218, loss_att=42.636, acc=0.657, cer=0.320, wer=0.994, loss=42.408, time=38 minutes and 48.94 seconds, total_count=93420, gpu_max_cached_mem_GB=41.891 +[gpub022:0/16] 2024-02-01 19:56:26,121 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub022:0/16] 2024-02-01 19:56:26,166 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/13epoch.pth +[gpub022:0/16] 2024-02-01 19:56:26,167 (trainer:272) INFO: 19/45epoch started. Estimated time to finish: 1 week, 3 days and 2 hours +[gpub022:0/16] 2024-02-01 19:56:26,176 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub022:0/16] 2024-02-01 19:56:43,827 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 19:56:47,152 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 19:56:47,152 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub022:0/16] 2024-02-01 19:56:47,155 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 20:03:46,393 (trainer:737) INFO: 19epoch:train:1-100batch: iter_time=2.721, forward_time=0.494, loss_ctc=46.786, loss_att=43.911, acc=0.752, loss=44.774, backward_time=0.456, grad_norm=31.442, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.357e-04, train_time=4.401 +[gpub022:0/16] 2024-02-01 20:06:05,161 (trainer:737) INFO: 19epoch:train:101-200batch: iter_time=9.416e-05, forward_time=0.292, loss_ctc=65.900, loss_att=65.884, acc=0.720, loss=65.889, backward_time=0.407, grad_norm=44.090, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.356e-04, train_time=1.389 +[gpub022:0/16] 2024-02-01 20:09:00,767 (trainer:737) INFO: 19epoch:train:201-300batch: iter_time=4.911e-04, forward_time=0.434, loss_ctc=52.590, loss_att=50.960, acc=0.736, loss=51.449, backward_time=0.427, grad_norm=32.975, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.356e-04, train_time=1.755 +[gpub022:0/16] 2024-02-01 20:11:29,168 (trainer:737) INFO: 19epoch:train:301-400batch: iter_time=1.045e-04, forward_time=0.289, loss_ctc=47.686, loss_att=41.714, acc=0.754, loss=43.506, backward_time=0.403, grad_norm=31.484, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.356e-04, train_time=1.485 +[gpub022:0/16] 2024-02-01 20:14:08,644 (trainer:737) INFO: 19epoch:train:401-500batch: iter_time=4.683e-04, forward_time=0.354, loss_ctc=58.247, loss_att=58.472, acc=0.712, loss=58.404, backward_time=0.444, grad_norm=42.270, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.110, optim0_lr0=2.355e-04, train_time=1.595 +[gpub022:0/16] 2024-02-01 20:16:35,330 (trainer:737) INFO: 19epoch:train:501-600batch: iter_time=1.086e-04, forward_time=0.334, loss_ctc=57.944, loss_att=50.588, acc=0.729, loss=52.795, backward_time=0.420, grad_norm=39.129, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.355e-04, train_time=1.465 +[gpub022:0/16] 2024-02-01 20:19:11,470 (trainer:737) INFO: 19epoch:train:601-700batch: iter_time=2.057e-04, forward_time=0.371, loss_ctc=51.805, loss_att=49.064, acc=0.744, loss=49.886, backward_time=0.454, grad_norm=33.692, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.354e-04, train_time=1.563 +[gpub022:0/16] 2024-02-01 20:21:55,117 (trainer:737) INFO: 19epoch:train:701-800batch: iter_time=3.612e-04, forward_time=0.425, loss_ctc=45.807, loss_att=41.279, acc=0.753, loss=42.637, backward_time=0.439, grad_norm=31.229, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.354e-04, train_time=1.636 +[gpub022:0/16] 2024-02-01 20:24:42,656 (trainer:737) INFO: 19epoch:train:801-900batch: iter_time=4.417e-04, forward_time=0.405, loss_ctc=63.784, loss_att=56.138, acc=0.720, loss=58.432, backward_time=0.426, grad_norm=55.834, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=2.353e-04, train_time=1.676 +[gpub022:0/16] 2024-02-01 20:27:07,020 (trainer:737) INFO: 19epoch:train:901-1000batch: iter_time=9.694e-05, forward_time=0.301, loss_ctc=53.685, loss_att=49.117, acc=0.754, loss=50.488, backward_time=0.410, grad_norm=34.720, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.353e-04, train_time=1.443 +[gpub022:0/16] 2024-02-01 20:29:47,082 (trainer:737) INFO: 19epoch:train:1001-1100batch: iter_time=1.626e-04, forward_time=0.365, loss_ctc=42.949, loss_att=43.710, acc=0.739, loss=43.482, backward_time=0.437, grad_norm=29.752, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.353e-04, train_time=1.601 +[gpub022:0/16] 2024-02-01 20:32:36,002 (trainer:737) INFO: 19epoch:train:1101-1200batch: iter_time=2.036e-04, forward_time=0.389, loss_ctc=44.059, loss_att=43.595, acc=0.741, loss=43.734, backward_time=0.434, grad_norm=33.321, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=2.352e-04, train_time=1.688 +[gpub022:0/16] 2024-02-01 20:34:05,948 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub022:0/16] 2024-02-01 20:34:24,338 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 20:34:27,827 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 20:34:27,827 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub022:0/16] 2024-02-01 20:34:27,859 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 20:40:07,415 (trainer:737) INFO: 19epoch:train:1201-1300batch: iter_time=2.961, forward_time=0.290, loss_ctc=54.447, loss_att=49.153, acc=0.740, loss=50.741, backward_time=0.405, grad_norm=37.743, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.352e-04, train_time=4.515 +[gpub022:0/16] 2024-02-01 20:42:33,333 (trainer:737) INFO: 19epoch:train:1301-1400batch: iter_time=9.638e-05, forward_time=0.294, loss_ctc=60.769, loss_att=56.318, acc=0.742, loss=57.653, backward_time=0.413, grad_norm=38.263, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.351e-04, train_time=1.459 +[gpub022:0/16] 2024-02-01 20:45:09,988 (trainer:737) INFO: 19epoch:train:1401-1500batch: iter_time=9.719e-05, forward_time=0.356, loss_ctc=47.317, loss_att=53.299, acc=0.738, loss=51.504, backward_time=0.465, grad_norm=32.919, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.351e-04, train_time=1.566 +[gpub022:0/16] 2024-02-01 20:47:39,543 (trainer:737) INFO: 19epoch:train:1501-1600batch: iter_time=9.044e-05, forward_time=0.315, loss_ctc=52.142, loss_att=44.481, acc=0.747, loss=46.779, backward_time=0.403, grad_norm=31.127, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.350e-04, train_time=1.496 +[gpub022:0/16] 2024-02-01 20:50:12,144 (trainer:737) INFO: 19epoch:train:1601-1700batch: iter_time=1.027e-04, forward_time=0.290, loss_ctc=51.684, loss_att=53.714, acc=0.726, loss=53.105, backward_time=0.403, grad_norm=35.665, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.350e-04, train_time=1.526 +[gpub022:0/16] 2024-02-01 20:52:49,340 (trainer:737) INFO: 19epoch:train:1701-1800batch: iter_time=1.002e-04, forward_time=0.390, loss_ctc=55.045, loss_att=48.155, acc=0.727, loss=50.222, backward_time=0.452, grad_norm=44.268, clip=100.000, loss_scale=4.387e+33, optim_step_time=0.101, optim0_lr0=2.350e-04, train_time=1.572 +[gpub022:0/16] 2024-02-01 20:55:10,680 (trainer:737) INFO: 19epoch:train:1801-1900batch: iter_time=9.691e-05, forward_time=0.291, loss_ctc=52.886, loss_att=50.436, acc=0.750, loss=51.171, backward_time=0.409, grad_norm=33.992, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.349e-04, train_time=1.412 +[gpub022:0/16] 2024-02-01 20:57:43,541 (trainer:737) INFO: 19epoch:train:1901-2000batch: iter_time=9.241e-05, forward_time=0.290, loss_ctc=45.936, loss_att=41.773, acc=0.747, loss=43.022, backward_time=0.401, grad_norm=30.972, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.349e-04, train_time=1.530 +[gpub022:0/16] 2024-02-01 21:00:21,323 (trainer:737) INFO: 19epoch:train:2001-2100batch: iter_time=9.287e-05, forward_time=0.335, loss_ctc=53.386, loss_att=50.984, acc=0.733, loss=51.705, backward_time=0.424, grad_norm=37.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.348e-04, train_time=1.578 +[gpub022:0/16] 2024-02-01 21:02:58,370 (trainer:737) INFO: 19epoch:train:2101-2200batch: iter_time=9.513e-05, forward_time=0.362, loss_ctc=61.991, loss_att=56.770, acc=0.737, loss=58.336, backward_time=0.420, grad_norm=56.785, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.348e-04, train_time=1.570 +[gpub022:0/16] 2024-02-01 21:05:27,582 (trainer:737) INFO: 19epoch:train:2201-2300batch: iter_time=1.002e-04, forward_time=0.290, loss_ctc=43.859, loss_att=44.776, acc=0.760, loss=44.501, backward_time=0.404, grad_norm=29.180, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.347e-04, train_time=1.491 +[gpub022:0/16] 2024-02-01 21:07:53,884 (trainer:737) INFO: 19epoch:train:2301-2400batch: iter_time=9.948e-05, forward_time=0.288, loss_ctc=43.180, loss_att=43.506, acc=0.741, loss=43.408, backward_time=0.401, grad_norm=30.794, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.347e-04, train_time=1.464 +[gpub022:0/16] 2024-02-01 21:10:33,156 (trainer:737) INFO: 19epoch:train:2401-2500batch: iter_time=0.001, forward_time=0.335, loss_ctc=51.150, loss_att=46.117, acc=0.737, loss=47.627, backward_time=0.451, grad_norm=39.278, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=2.347e-04, train_time=1.592 +[gpub022:0/16] 2024-02-01 21:10:53,180 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub022:0/16] 2024-02-01 21:11:12,071 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 21:11:15,520 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 21:11:15,520 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub022:0/16] 2024-02-01 21:11:15,542 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 21:17:57,431 (trainer:737) INFO: 19epoch:train:2501-2600batch: iter_time=3.010, forward_time=0.316, loss_ctc=45.940, loss_att=44.335, acc=0.746, loss=44.816, backward_time=0.407, grad_norm=33.508, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.346e-04, train_time=4.443 +[gpub022:0/16] 2024-02-01 21:20:31,401 (trainer:737) INFO: 19epoch:train:2601-2700batch: iter_time=1.026e-04, forward_time=0.293, loss_ctc=60.525, loss_att=63.997, acc=0.722, loss=62.955, backward_time=0.408, grad_norm=43.233, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.346e-04, train_time=1.539 +[gpub022:0/16] 2024-02-01 21:23:04,933 (trainer:737) INFO: 19epoch:train:2701-2800batch: iter_time=9.610e-05, forward_time=0.368, loss_ctc=50.919, loss_att=51.661, acc=0.727, loss=51.438, backward_time=0.428, grad_norm=34.864, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.345e-04, train_time=1.534 +[gpub022:0/16] 2024-02-01 21:25:31,864 (trainer:737) INFO: 19epoch:train:2801-2900batch: iter_time=9.023e-05, forward_time=0.312, loss_ctc=46.596, loss_att=41.668, acc=0.742, loss=43.147, backward_time=0.419, grad_norm=31.501, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.345e-04, train_time=1.470 +[gpub022:0/16] 2024-02-01 21:28:02,571 (trainer:737) INFO: 19epoch:train:2901-3000batch: iter_time=9.741e-05, forward_time=0.290, loss_ctc=56.777, loss_att=58.565, acc=0.709, loss=58.028, backward_time=0.404, grad_norm=43.355, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.344e-04, train_time=1.507 +[gpub022:0/16] 2024-02-01 21:30:22,608 (trainer:737) INFO: 19epoch:train:3001-3100batch: iter_time=9.278e-05, forward_time=0.294, loss_ctc=50.950, loss_att=47.926, acc=0.729, loss=48.833, backward_time=0.404, grad_norm=40.471, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.344e-04, train_time=1.400 +[gpub022:0/16] 2024-02-01 21:33:03,437 (trainer:737) INFO: 19epoch:train:3101-3200batch: iter_time=9.374e-05, forward_time=0.355, loss_ctc=50.702, loss_att=45.521, acc=0.746, loss=47.076, backward_time=0.467, grad_norm=34.598, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.344e-04, train_time=1.608 +[gpub022:0/16] 2024-02-01 21:35:24,090 (trainer:737) INFO: 19epoch:train:3201-3300batch: iter_time=9.809e-05, forward_time=0.288, loss_ctc=45.013, loss_att=40.989, acc=0.747, loss=42.196, backward_time=0.402, grad_norm=30.475, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.343e-04, train_time=1.406 +[gpub022:0/16] 2024-02-01 21:37:51,398 (trainer:737) INFO: 19epoch:train:3301-3400batch: iter_time=9.760e-05, forward_time=0.289, loss_ctc=62.658, loss_att=57.139, acc=0.720, loss=58.795, backward_time=0.404, grad_norm=53.437, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.343e-04, train_time=1.473 +[gpub022:0/16] 2024-02-01 21:40:17,034 (trainer:737) INFO: 19epoch:train:3401-3500batch: iter_time=1.039e-04, forward_time=0.292, loss_ctc=52.640, loss_att=48.100, acc=0.745, loss=49.462, backward_time=0.411, grad_norm=36.515, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.342e-04, train_time=1.456 +[gpub022:0/16] 2024-02-01 21:42:51,295 (trainer:737) INFO: 19epoch:train:3501-3600batch: iter_time=9.250e-05, forward_time=0.372, loss_ctc=42.195, loss_att=43.122, acc=0.741, loss=42.844, backward_time=0.427, grad_norm=30.145, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.342e-04, train_time=1.542 +[gpub022:0/16] 2024-02-01 21:45:22,552 (trainer:737) INFO: 19epoch:train:3601-3700batch: iter_time=9.497e-05, forward_time=0.291, loss_ctc=42.798, loss_att=43.420, acc=0.738, loss=43.233, backward_time=0.401, grad_norm=34.382, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.341e-04, train_time=1.513 +[gpub022:0/16] 2024-02-01 21:46:51,284 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub022:0/16] 2024-02-01 21:47:10,093 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 21:47:13,604 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 21:47:13,604 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub022:0/16] 2024-02-01 21:47:13,626 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 21:53:11,660 (trainer:737) INFO: 19epoch:train:3701-3800batch: iter_time=3.286, forward_time=0.333, loss_ctc=52.791, loss_att=48.799, acc=0.736, loss=49.997, backward_time=0.411, grad_norm=37.475, clip=100.000, loss_scale=8.775e+33, optim_step_time=0.095, optim0_lr0=2.341e-04, train_time=4.691 +[gpub022:0/16] 2024-02-01 21:55:29,589 (trainer:737) INFO: 19epoch:train:3801-3900batch: iter_time=8.091e-05, forward_time=0.296, loss_ctc=58.522, loss_att=57.162, acc=0.727, loss=57.570, backward_time=0.406, grad_norm=39.116, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.341e-04, train_time=1.379 +[gpub022:0/16] 2024-02-01 21:57:53,497 (trainer:737) INFO: 19epoch:train:3901-4000batch: iter_time=7.796e-05, forward_time=0.290, loss_ctc=46.267, loss_att=52.513, acc=0.737, loss=50.639, backward_time=0.402, grad_norm=31.703, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.340e-04, train_time=1.439 +[gpub022:0/16] 2024-02-01 21:58:41,129 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 22:00:34,409 (trainer:737) INFO: 19epoch:train:4001-4100batch: iter_time=9.150e-05, forward_time=0.343, loss_ctc=51.852, loss_att=44.295, acc=0.739, loss=46.562, backward_time=0.461, grad_norm=32.395, clip=100.000, loss_scale=6.923e+33, optim_step_time=0.100, optim0_lr0=2.340e-04, train_time=1.609 +[gpub022:0/16] 2024-02-01 22:02:52,997 (trainer:737) INFO: 19epoch:train:4101-4200batch: iter_time=9.467e-05, forward_time=0.294, loss_ctc=51.120, loss_att=53.508, acc=0.716, loss=52.792, backward_time=0.406, grad_norm=37.570, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.339e-04, train_time=1.386 +[gpub022:0/16] 2024-02-01 22:05:15,316 (trainer:737) INFO: 19epoch:train:4201-4300batch: iter_time=9.623e-05, forward_time=0.291, loss_ctc=52.906, loss_att=46.666, acc=0.734, loss=48.538, backward_time=0.403, grad_norm=43.770, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.339e-04, train_time=1.422 +[gpub022:0/16] 2024-02-01 22:07:50,535 (trainer:737) INFO: 19epoch:train:4301-4400batch: iter_time=8.977e-05, forward_time=0.290, loss_ctc=52.185, loss_att=48.035, acc=0.744, loss=49.280, backward_time=0.404, grad_norm=35.645, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.338e-04, train_time=1.553 +[gpub022:0/16] 2024-02-01 22:10:40,251 (trainer:737) INFO: 19epoch:train:4401-4500batch: iter_time=9.651e-05, forward_time=0.361, loss_ctc=45.661, loss_att=40.253, acc=0.749, loss=41.875, backward_time=0.455, grad_norm=30.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.338e-04, train_time=1.697 +[gpub022:0/16] 2024-02-01 22:12:58,722 (trainer:737) INFO: 19epoch:train:4501-4600batch: iter_time=9.328e-05, forward_time=0.294, loss_ctc=52.162, loss_att=49.372, acc=0.728, loss=50.209, backward_time=0.404, grad_norm=37.028, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.338e-04, train_time=1.384 +[gpub022:0/16] 2024-02-01 22:15:29,467 (trainer:737) INFO: 19epoch:train:4601-4700batch: iter_time=9.178e-05, forward_time=0.295, loss_ctc=60.596, loss_att=56.344, acc=0.729, loss=57.619, backward_time=0.404, grad_norm=54.461, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.337e-04, train_time=1.507 +[gpub022:0/16] 2024-02-01 22:18:06,516 (trainer:737) INFO: 19epoch:train:4701-4800batch: iter_time=8.598e-05, forward_time=0.376, loss_ctc=43.263, loss_att=44.589, acc=0.757, loss=44.191, backward_time=0.444, grad_norm=28.571, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.337e-04, train_time=1.571 +[gpub022:0/16] 2024-02-01 22:20:34,500 (trainer:737) INFO: 19epoch:train:4801-4900batch: iter_time=8.217e-05, forward_time=0.287, loss_ctc=42.374, loss_att=42.710, acc=0.740, loss=42.609, backward_time=0.400, grad_norm=30.751, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.336e-04, train_time=1.480 +[gpub022:0/16] 2024-02-01 22:22:52,004 (trainer:737) INFO: 19epoch:train:4901-5000batch: iter_time=8.276e-05, forward_time=0.293, loss_ctc=50.103, loss_att=45.866, acc=0.730, loss=47.137, backward_time=0.403, grad_norm=37.620, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.336e-04, train_time=1.375 +[gpub022:0/16] 2024-02-01 22:23:12,308 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub022:0/16] 2024-02-01 22:23:31,028 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 22:23:34,588 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 22:23:34,588 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub022:0/16] 2024-02-01 22:23:34,591 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 22:30:50,139 (trainer:737) INFO: 19epoch:train:5001-5100batch: iter_time=3.295, forward_time=0.368, loss_ctc=45.723, loss_att=41.211, acc=0.756, loss=42.565, backward_time=0.415, grad_norm=30.713, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.335e-04, train_time=4.781 +[gpub022:0/16] 2024-02-01 22:33:22,026 (trainer:737) INFO: 19epoch:train:5101-5200batch: iter_time=9.378e-05, forward_time=0.292, loss_ctc=60.508, loss_att=63.294, acc=0.723, loss=62.458, backward_time=0.406, grad_norm=41.492, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.335e-04, train_time=1.519 +[gpub022:0/16] 2024-02-01 22:36:05,097 (trainer:737) INFO: 19epoch:train:5201-5300batch: iter_time=9.640e-05, forward_time=0.295, loss_ctc=50.553, loss_att=49.291, acc=0.735, loss=49.669, backward_time=0.402, grad_norm=31.867, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.335e-04, train_time=1.630 +[gpub022:0/16] 2024-02-01 22:38:36,726 (trainer:737) INFO: 19epoch:train:5301-5400batch: iter_time=1.026e-04, forward_time=0.370, loss_ctc=46.196, loss_att=40.075, acc=0.748, loss=41.911, backward_time=0.436, grad_norm=30.043, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=2.334e-04, train_time=1.517 +[gpub022:0/16] 2024-02-01 22:41:01,289 (trainer:737) INFO: 19epoch:train:5401-5500batch: iter_time=9.306e-05, forward_time=0.321, loss_ctc=55.788, loss_att=56.051, acc=0.716, loss=55.972, backward_time=0.403, grad_norm=39.056, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.334e-04, train_time=1.446 +[gpub022:0/16] 2024-02-01 22:43:30,865 (trainer:737) INFO: 19epoch:train:5501-5600batch: iter_time=1.071e-04, forward_time=0.289, loss_ctc=50.245, loss_att=47.519, acc=0.732, loss=48.337, backward_time=0.404, grad_norm=39.164, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.333e-04, train_time=1.495 +[gpub022:0/16] 2024-02-01 22:46:12,985 (trainer:737) INFO: 19epoch:train:5601-5700batch: iter_time=1.025e-04, forward_time=0.307, loss_ctc=50.620, loss_att=45.268, acc=0.748, loss=46.873, backward_time=0.427, grad_norm=100.497, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.333e-04, train_time=1.621 +[gpub022:0/16] 2024-02-01 22:48:52,585 (trainer:737) INFO: 19epoch:train:5701-5800batch: iter_time=9.555e-05, forward_time=0.393, loss_ctc=44.820, loss_att=41.090, acc=0.748, loss=42.209, backward_time=0.438, grad_norm=29.329, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.332e-04, train_time=1.596 +[gpub022:0/16] 2024-02-01 22:51:15,635 (trainer:737) INFO: 19epoch:train:5801-5900batch: iter_time=1.069e-04, forward_time=0.294, loss_ctc=59.526, loss_att=53.356, acc=0.725, loss=55.207, backward_time=0.403, grad_norm=52.847, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.332e-04, train_time=1.431 +[gpub022:0/16] 2024-02-01 22:53:53,013 (trainer:737) INFO: 19epoch:train:5901-6000batch: iter_time=9.792e-05, forward_time=0.293, loss_ctc=51.746, loss_att=47.421, acc=0.748, loss=48.719, backward_time=0.403, grad_norm=37.128, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.332e-04, train_time=1.573 +[gpub022:0/16] 2024-02-01 22:56:21,485 (trainer:737) INFO: 19epoch:train:6001-6100batch: iter_time=9.489e-05, forward_time=0.365, loss_ctc=41.572, loss_att=42.813, acc=0.744, loss=42.441, backward_time=0.441, grad_norm=28.825, clip=100.000, loss_scale=8.619e+33, optim_step_time=0.100, optim0_lr0=2.331e-04, train_time=1.485 +[gpub022:0/16] 2024-02-01 22:58:50,653 (trainer:737) INFO: 19epoch:train:6101-6200batch: iter_time=9.179e-05, forward_time=0.297, loss_ctc=42.208, loss_att=43.454, acc=0.737, loss=43.080, backward_time=0.402, grad_norm=33.202, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.331e-04, train_time=1.491 +[gpub022:0/16] 2024-02-01 23:00:30,910 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub022:0/16] 2024-02-01 23:00:49,703 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 23:00:53,262 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 23:00:53,262 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub022:0/16] 2024-02-01 23:00:53,265 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 23:07:16,856 (trainer:737) INFO: 19epoch:train:6201-6300batch: iter_time=3.547, forward_time=0.327, loss_ctc=52.137, loss_att=48.683, acc=0.737, loss=49.719, backward_time=0.409, grad_norm=35.836, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.330e-04, train_time=5.062 +[gpub022:0/16] 2024-02-01 23:09:51,954 (trainer:737) INFO: 19epoch:train:6301-6400batch: iter_time=8.310e-05, forward_time=0.337, loss_ctc=57.930, loss_att=56.350, acc=0.731, loss=56.824, backward_time=0.415, grad_norm=37.234, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.330e-04, train_time=1.551 +[gpub022:0/16] 2024-02-01 23:12:23,270 (trainer:737) INFO: 19epoch:train:6401-6500batch: iter_time=8.149e-05, forward_time=0.293, loss_ctc=46.345, loss_att=53.122, acc=0.737, loss=51.089, backward_time=0.402, grad_norm=32.013, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.329e-04, train_time=1.513 +[gpub022:0/16] 2024-02-01 23:14:58,789 (trainer:737) INFO: 19epoch:train:6501-6600batch: iter_time=1.023e-04, forward_time=0.367, loss_ctc=51.850, loss_att=44.316, acc=0.740, loss=46.576, backward_time=0.427, grad_norm=32.806, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.329e-04, train_time=1.555 +[gpub022:0/16] 2024-02-01 23:17:45,545 (trainer:737) INFO: 19epoch:train:6601-6700batch: iter_time=1.029e-04, forward_time=0.328, loss_ctc=50.764, loss_att=53.739, acc=0.719, loss=52.846, backward_time=0.421, grad_norm=34.629, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.329e-04, train_time=1.667 +[gpub022:0/16] 2024-02-01 23:20:19,944 (trainer:737) INFO: 19epoch:train:6701-6800batch: iter_time=1.030e-04, forward_time=0.291, loss_ctc=51.222, loss_att=45.776, acc=0.736, loss=47.410, backward_time=0.403, grad_norm=43.842, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.328e-04, train_time=1.544 +[gpub022:0/16] 2024-02-01 23:23:05,463 (trainer:737) INFO: 19epoch:train:6801-6900batch: iter_time=1.073e-04, forward_time=0.375, loss_ctc=51.946, loss_att=47.485, acc=0.747, loss=48.823, backward_time=0.424, grad_norm=33.840, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.328e-04, train_time=1.654 +[gpub022:0/16] 2024-02-01 23:26:02,949 (trainer:737) INFO: 19epoch:train:6901-7000batch: iter_time=1.047e-04, forward_time=0.352, loss_ctc=45.561, loss_att=40.273, acc=0.749, loss=41.860, backward_time=0.421, grad_norm=29.911, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.327e-04, train_time=1.775 +[gpub022:0/16] 2024-02-01 23:28:22,010 (trainer:737) INFO: 19epoch:train:7001-7100batch: iter_time=9.962e-05, forward_time=0.289, loss_ctc=51.965, loss_att=48.602, acc=0.732, loss=49.611, backward_time=0.404, grad_norm=37.114, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.327e-04, train_time=1.390 +[gpub022:0/16] 2024-02-01 23:31:37,974 (trainer:737) INFO: 19epoch:train:7101-7200batch: iter_time=3.172e-04, forward_time=0.315, loss_ctc=59.284, loss_att=53.630, acc=0.734, loss=55.327, backward_time=0.420, grad_norm=48.967, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.327e-04, train_time=1.960 +[gpub022:0/16] 2024-02-01 23:33:58,572 (trainer:737) INFO: 19epoch:train:7201-7300batch: iter_time=9.873e-05, forward_time=0.329, loss_ctc=43.206, loss_att=44.113, acc=0.758, loss=43.841, backward_time=0.407, grad_norm=29.093, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.326e-04, train_time=1.406 +[gpub022:0/16] 2024-02-01 23:36:46,883 (trainer:737) INFO: 19epoch:train:7301-7400batch: iter_time=1.065e-04, forward_time=0.327, loss_ctc=42.482, loss_att=43.006, acc=0.738, loss=42.849, backward_time=0.422, grad_norm=29.824, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.326e-04, train_time=1.683 +[gpub022:0/16] 2024-02-01 23:39:18,293 (trainer:737) INFO: 19epoch:train:7401-7500batch: iter_time=9.992e-05, forward_time=0.292, loss_ctc=49.252, loss_att=45.141, acc=0.731, loss=46.375, backward_time=0.402, grad_norm=37.824, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.325e-04, train_time=1.514 +[gpub022:0/16] 2024-02-01 23:39:38,321 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub022:0/16] 2024-02-01 23:39:57,266 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-01 23:40:00,832 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-01 23:40:00,832 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub022:0/16] 2024-02-01 23:40:00,835 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-01 23:48:34,092 (trainer:737) INFO: 19epoch:train:7501-7600batch: iter_time=3.935, forward_time=0.380, loss_ctc=45.381, loss_att=43.077, acc=0.759, loss=43.768, backward_time=0.418, grad_norm=31.371, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.325e-04, train_time=5.558 +[gpub022:0/16] 2024-02-01 23:50:55,685 (trainer:737) INFO: 19epoch:train:7601-7700batch: iter_time=1.103e-04, forward_time=0.293, loss_ctc=58.330, loss_att=62.993, acc=0.732, loss=61.594, backward_time=0.407, grad_norm=38.096, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.324e-04, train_time=1.416 +[gpub022:0/16] 2024-02-01 23:52:00,009 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-01 23:53:55,123 (trainer:737) INFO: 19epoch:train:7701-7800batch: iter_time=2.481e-04, forward_time=0.425, loss_ctc=50.606, loss_att=50.939, acc=0.740, loss=50.839, backward_time=0.443, grad_norm=33.913, clip=100.000, loss_scale=7.395e+33, optim_step_time=0.101, optim0_lr0=2.324e-04, train_time=1.794 +[gpub022:0/16] 2024-02-01 23:56:21,214 (trainer:737) INFO: 19epoch:train:7801-7900batch: iter_time=9.679e-05, forward_time=0.290, loss_ctc=46.233, loss_att=41.603, acc=0.758, loss=42.992, backward_time=0.404, grad_norm=29.703, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.324e-04, train_time=1.461 +[gpub022:0/16] 2024-02-01 23:58:44,421 (trainer:737) INFO: 19epoch:train:7901-8000batch: iter_time=1.055e-04, forward_time=0.294, loss_ctc=55.025, loss_att=56.759, acc=0.722, loss=56.239, backward_time=0.404, grad_norm=39.883, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.323e-04, train_time=1.432 +[gpub022:0/16] 2024-02-02 00:01:36,741 (trainer:737) INFO: 19epoch:train:8001-8100batch: iter_time=1.082e-04, forward_time=0.405, loss_ctc=49.995, loss_att=49.129, acc=0.734, loss=49.389, backward_time=0.437, grad_norm=38.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.323e-04, train_time=1.723 +[gpub022:0/16] 2024-02-02 00:04:12,911 (trainer:737) INFO: 19epoch:train:8101-8200batch: iter_time=9.360e-05, forward_time=0.291, loss_ctc=50.176, loss_att=48.601, acc=0.749, loss=49.073, backward_time=0.405, grad_norm=32.831, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.322e-04, train_time=1.561 +[gpub022:0/16] 2024-02-02 00:06:43,784 (trainer:737) INFO: 19epoch:train:8201-8300batch: iter_time=8.911e-05, forward_time=0.296, loss_ctc=44.368, loss_att=40.691, acc=0.759, loss=41.794, backward_time=0.414, grad_norm=29.698, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.322e-04, train_time=1.509 +[gpub022:0/16] 2024-02-02 00:09:49,065 (trainer:737) INFO: 19epoch:train:8301-8400batch: iter_time=2.874e-04, forward_time=0.374, loss_ctc=59.843, loss_att=54.425, acc=0.728, loss=56.050, backward_time=0.437, grad_norm=48.741, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.322e-04, train_time=1.852 +[gpub022:0/16] 2024-02-02 00:12:21,959 (trainer:737) INFO: 19epoch:train:8401-8500batch: iter_time=8.975e-05, forward_time=0.292, loss_ctc=51.630, loss_att=48.666, acc=0.757, loss=49.555, backward_time=0.403, grad_norm=32.220, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.321e-04, train_time=1.529 +[gpub022:0/16] 2024-02-02 00:15:07,180 (trainer:737) INFO: 19epoch:train:8501-8600batch: iter_time=9.041e-05, forward_time=0.347, loss_ctc=41.085, loss_att=43.370, acc=0.746, loss=42.684, backward_time=0.453, grad_norm=30.203, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.321e-04, train_time=1.652 +[gpub022:0/16] 2024-02-02 00:17:33,722 (trainer:737) INFO: 19epoch:train:8601-8700batch: iter_time=1.039e-04, forward_time=0.321, loss_ctc=42.174, loss_att=43.152, acc=0.746, loss=42.859, backward_time=0.414, grad_norm=32.490, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.320e-04, train_time=1.465 +[gpub022:0/16] 2024-02-02 00:19:27,910 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub022:0/16] 2024-02-02 00:19:47,120 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 00:19:51,259 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 00:19:51,259 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub022:0/16] 2024-02-02 00:19:51,263 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 00:26:32,398 (trainer:737) INFO: 19epoch:train:8701-8800batch: iter_time=3.680, forward_time=0.288, loss_ctc=51.363, loss_att=47.729, acc=0.748, loss=48.819, backward_time=0.403, grad_norm=34.740, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.320e-04, train_time=5.386 +[gpub022:0/16] 2024-02-02 00:28:43,713 (trainer:737) INFO: 19epoch:train:8801-8900batch: iter_time=9.261e-05, forward_time=0.293, loss_ctc=57.141, loss_att=56.897, acc=0.732, loss=56.971, backward_time=0.408, grad_norm=39.107, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.319e-04, train_time=1.313 +[gpub022:0/16] 2024-02-02 00:31:41,232 (trainer:737) INFO: 19epoch:train:8901-9000batch: iter_time=8.437e-05, forward_time=0.400, loss_ctc=45.903, loss_att=52.659, acc=0.738, loss=50.632, backward_time=0.466, grad_norm=35.977, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.319e-04, train_time=1.775 +[gpub022:0/16] 2024-02-02 00:34:18,894 (trainer:737) INFO: 19epoch:train:9001-9100batch: iter_time=8.357e-05, forward_time=0.289, loss_ctc=51.539, loss_att=44.647, acc=0.741, loss=46.715, backward_time=0.402, grad_norm=34.276, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.319e-04, train_time=1.576 +[gpub022:0/16] 2024-02-02 00:36:34,821 (trainer:737) INFO: 19epoch:train:9101-9200batch: iter_time=7.956e-05, forward_time=0.291, loss_ctc=50.207, loss_att=53.183, acc=0.721, loss=52.291, backward_time=0.404, grad_norm=34.317, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.318e-04, train_time=1.360 +[gpub022:0/16] 2024-02-02 00:39:24,678 (trainer:737) INFO: 19epoch:train:9201-9300batch: iter_time=7.909e-05, forward_time=0.377, loss_ctc=51.097, loss_att=46.234, acc=0.734, loss=47.693, backward_time=0.462, grad_norm=46.858, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.318e-04, train_time=1.698 +[gpub022:0/16] 2024-02-02 00:41:56,859 (trainer:737) INFO: 19epoch:train:9301-9400batch: iter_time=8.178e-05, forward_time=0.290, loss_ctc=51.869, loss_att=47.495, acc=0.746, loss=48.807, backward_time=0.404, grad_norm=35.881, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.317e-04, train_time=1.522 +[gpub022:0/16] 2024-02-02 00:44:29,128 (trainer:737) INFO: 19epoch:train:9401-9500batch: iter_time=7.707e-05, forward_time=0.289, loss_ctc=45.361, loss_att=40.464, acc=0.750, loss=41.933, backward_time=0.402, grad_norm=30.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.317e-04, train_time=1.522 +[gpub022:0/16] 2024-02-02 00:47:00,242 (trainer:737) INFO: 19epoch:train:9501-9600batch: iter_time=9.632e-05, forward_time=0.367, loss_ctc=50.758, loss_att=48.663, acc=0.731, loss=49.291, backward_time=0.439, grad_norm=36.297, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.317e-04, train_time=1.511 +[gpub022:0/16] 2024-02-02 00:49:40,115 (trainer:737) INFO: 19epoch:train:9601-9700batch: iter_time=1.083e-04, forward_time=0.292, loss_ctc=58.793, loss_att=54.433, acc=0.733, loss=55.741, backward_time=0.404, grad_norm=48.878, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.316e-04, train_time=1.598 +[gpub022:0/16] 2024-02-02 00:52:10,690 (trainer:737) INFO: 19epoch:train:9701-9800batch: iter_time=8.419e-05, forward_time=0.288, loss_ctc=43.335, loss_att=43.843, acc=0.759, loss=43.691, backward_time=0.400, grad_norm=28.884, clip=100.000, loss_scale=8.152e+33, optim_step_time=0.093, optim0_lr0=2.316e-04, train_time=1.506 +[gpub022:0/16] 2024-02-02 00:54:25,154 (trainer:737) INFO: 19epoch:train:9801-9900batch: iter_time=8.725e-05, forward_time=0.293, loss_ctc=42.199, loss_att=42.889, acc=0.740, loss=42.682, backward_time=0.403, grad_norm=30.880, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.315e-04, train_time=1.344 +[gpub022:0/16] 2024-02-02 00:57:20,942 (trainer:737) INFO: 19epoch:train:9901-10000batch: iter_time=1.089e-04, forward_time=0.355, loss_ctc=48.469, loss_att=45.492, acc=0.728, loss=46.385, backward_time=0.452, grad_norm=37.311, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.315e-04, train_time=1.758 +[gpub022:0/16] 2024-02-02 00:57:41,252 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub022:0/16] 2024-02-02 00:57:59,986 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 00:58:03,504 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 00:58:03,504 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub022:0/16] 2024-02-02 00:58:03,507 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 01:05:45,389 (trainer:737) INFO: 19epoch:train:10001-10100batch: iter_time=3.636, forward_time=0.290, loss_ctc=45.686, loss_att=42.376, acc=0.762, loss=43.369, backward_time=0.403, grad_norm=31.223, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.314e-04, train_time=5.044 +[gpub022:0/16] 2024-02-02 01:08:20,108 (trainer:737) INFO: 19epoch:train:10101-10200batch: iter_time=8.690e-05, forward_time=0.315, loss_ctc=58.315, loss_att=62.051, acc=0.734, loss=60.930, backward_time=0.413, grad_norm=39.469, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.314e-04, train_time=1.546 +[gpub022:0/16] 2024-02-02 01:11:05,161 (trainer:737) INFO: 19epoch:train:10201-10300batch: iter_time=8.551e-05, forward_time=0.345, loss_ctc=49.361, loss_att=49.245, acc=0.745, loss=49.280, backward_time=0.458, grad_norm=31.793, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.314e-04, train_time=1.651 +[gpub022:0/16] 2024-02-02 01:13:40,938 (trainer:737) INFO: 19epoch:train:10301-10400batch: iter_time=9.123e-05, forward_time=0.289, loss_ctc=45.798, loss_att=40.588, acc=0.762, loss=42.151, backward_time=0.403, grad_norm=29.830, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.313e-04, train_time=1.558 +[gpub022:0/16] 2024-02-02 01:16:01,839 (trainer:737) INFO: 19epoch:train:10401-10500batch: iter_time=9.357e-05, forward_time=0.298, loss_ctc=54.735, loss_att=56.469, acc=0.722, loss=55.949, backward_time=0.412, grad_norm=39.090, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.313e-04, train_time=1.407 +[gpub022:0/16] 2024-02-02 01:18:47,560 (trainer:737) INFO: 19epoch:train:10501-10600batch: iter_time=9.121e-05, forward_time=0.382, loss_ctc=48.835, loss_att=48.327, acc=0.738, loss=48.479, backward_time=0.440, grad_norm=39.067, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.312e-04, train_time=1.659 +[gpub022:0/16] 2024-02-02 01:21:19,461 (trainer:737) INFO: 19epoch:train:10601-10700batch: iter_time=1.019e-04, forward_time=0.291, loss_ctc=49.721, loss_att=47.693, acc=0.751, loss=48.301, backward_time=0.403, grad_norm=32.654, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.312e-04, train_time=1.519 +[gpub022:0/16] 2024-02-02 01:22:25,271 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 01:24:02,530 (trainer:737) INFO: 19epoch:train:10701-10800batch: iter_time=1.068e-04, forward_time=0.289, loss_ctc=43.977, loss_att=40.406, acc=0.760, loss=41.477, backward_time=0.401, grad_norm=29.811, clip=100.000, loss_scale=7.500e+33, optim_step_time=0.093, optim0_lr0=2.312e-04, train_time=1.631 +[gpub022:0/16] 2024-02-02 01:26:39,391 (trainer:737) INFO: 19epoch:train:10801-10900batch: iter_time=9.522e-05, forward_time=0.402, loss_ctc=60.080, loss_att=55.240, acc=0.732, loss=56.692, backward_time=0.449, grad_norm=46.058, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.311e-04, train_time=1.568 +[gpub022:0/16] 2024-02-02 01:29:02,759 (trainer:737) INFO: 19epoch:train:10901-11000batch: iter_time=8.972e-05, forward_time=0.293, loss_ctc=50.718, loss_att=47.466, acc=0.761, loss=48.442, backward_time=0.406, grad_norm=34.293, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.311e-04, train_time=1.433 +[gpub022:0/16] 2024-02-02 01:31:38,984 (trainer:737) INFO: 19epoch:train:11001-11100batch: iter_time=9.368e-05, forward_time=0.291, loss_ctc=41.159, loss_att=42.735, acc=0.748, loss=42.262, backward_time=0.399, grad_norm=29.577, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.310e-04, train_time=1.562 +[gpub022:0/16] 2024-02-02 01:34:01,792 (trainer:737) INFO: 19epoch:train:11101-11200batch: iter_time=8.723e-05, forward_time=0.357, loss_ctc=41.961, loss_att=42.723, acc=0.747, loss=42.494, backward_time=0.433, grad_norm=31.941, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=2.310e-04, train_time=1.428 +[gpub022:0/16] 2024-02-02 01:35:48,615 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub022:0/16] 2024-02-02 01:36:07,356 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 01:36:10,886 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 01:36:10,886 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub022:0/16] 2024-02-02 01:36:10,889 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 01:42:24,937 (trainer:737) INFO: 19epoch:train:11201-11300batch: iter_time=3.430, forward_time=0.315, loss_ctc=51.072, loss_att=48.087, acc=0.747, loss=48.983, backward_time=0.416, grad_norm=35.399, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.310e-04, train_time=5.031 +[gpub022:0/16] 2024-02-02 01:44:41,501 (trainer:737) INFO: 19epoch:train:11301-11400batch: iter_time=8.714e-05, forward_time=0.292, loss_ctc=57.495, loss_att=54.693, acc=0.747, loss=55.534, backward_time=0.407, grad_norm=37.999, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.309e-04, train_time=1.365 +[gpub022:0/16] 2024-02-02 01:47:20,783 (trainer:737) INFO: 19epoch:train:11401-11500batch: iter_time=1.027e-04, forward_time=0.365, loss_ctc=45.680, loss_att=52.373, acc=0.744, loss=50.365, backward_time=0.461, grad_norm=32.650, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.309e-04, train_time=1.593 +[gpub022:0/16] 2024-02-02 01:50:03,670 (trainer:737) INFO: 19epoch:train:11501-11600batch: iter_time=1.215e-04, forward_time=0.291, loss_ctc=51.076, loss_att=44.810, acc=0.751, loss=46.690, backward_time=0.405, grad_norm=32.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.308e-04, train_time=1.629 +[gpub022:0/16] 2024-02-02 01:52:23,515 (trainer:737) INFO: 19epoch:train:11601-11700batch: iter_time=1.137e-04, forward_time=0.292, loss_ctc=49.658, loss_att=52.312, acc=0.734, loss=51.516, backward_time=0.405, grad_norm=33.534, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.308e-04, train_time=1.398 +[gpub022:0/16] 2024-02-02 01:55:10,867 (trainer:737) INFO: 19epoch:train:11701-11800batch: iter_time=1.153e-04, forward_time=0.410, loss_ctc=50.033, loss_att=46.712, acc=0.733, loss=47.708, backward_time=0.434, grad_norm=41.319, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.307e-04, train_time=1.673 +[gpub022:0/16] 2024-02-02 01:56:52,082 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 01:57:51,090 (trainer:737) INFO: 19epoch:train:11801-11900batch: iter_time=1.004e-04, forward_time=0.290, loss_ctc=52.017, loss_att=49.650, acc=0.755, loss=50.360, backward_time=0.404, grad_norm=34.793, clip=100.000, loss_scale=4.012e+33, optim_step_time=0.093, optim0_lr0=2.307e-04, train_time=1.602 +[gpub022:0/16] 2024-02-02 02:00:10,246 (trainer:737) INFO: 19epoch:train:11901-12000batch: iter_time=9.825e-05, forward_time=0.290, loss_ctc=45.123, loss_att=42.005, acc=0.752, loss=42.940, backward_time=0.404, grad_norm=28.908, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.307e-04, train_time=1.391 +[gpub022:0/16] 2024-02-02 02:03:02,956 (trainer:737) INFO: 19epoch:train:12001-12100batch: iter_time=9.953e-05, forward_time=0.387, loss_ctc=50.842, loss_att=49.167, acc=0.738, loss=49.670, backward_time=0.450, grad_norm=35.119, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=2.306e-04, train_time=1.727 +[gpub022:0/16] 2024-02-02 02:05:35,787 (trainer:737) INFO: 19epoch:train:12101-12200batch: iter_time=9.826e-05, forward_time=0.292, loss_ctc=58.800, loss_att=54.413, acc=0.743, loss=55.729, backward_time=0.405, grad_norm=53.766, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.306e-04, train_time=1.528 +[gpub022:0/16] 2024-02-02 02:08:16,019 (trainer:737) INFO: 19epoch:train:12201-12300batch: iter_time=1.005e-04, forward_time=0.309, loss_ctc=42.671, loss_att=44.475, acc=0.763, loss=43.934, backward_time=0.402, grad_norm=28.787, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.305e-04, train_time=1.602 +[gpub022:0/16] 2024-02-02 02:10:46,903 (trainer:737) INFO: 19epoch:train:12301-12400batch: iter_time=1.068e-04, forward_time=0.358, loss_ctc=42.285, loss_att=42.604, acc=0.747, loss=42.508, backward_time=0.457, grad_norm=29.435, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.305e-04, train_time=1.509 +[gpub022:0/16] 2024-02-02 02:13:27,147 (trainer:737) INFO: 19epoch:train:12401-12500batch: iter_time=9.812e-05, forward_time=0.292, loss_ctc=48.702, loss_att=44.620, acc=0.743, loss=45.845, backward_time=0.403, grad_norm=37.152, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.305e-04, train_time=1.602 +[gpub022:0/16] 2024-02-02 02:13:47,332 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub022:0/16] 2024-02-02 02:14:06,155 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 02:14:09,677 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 02:14:09,677 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub022:0/16] 2024-02-02 02:14:09,680 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 02:21:30,013 (trainer:737) INFO: 19epoch:train:12501-12600batch: iter_time=3.287, forward_time=0.288, loss_ctc=45.028, loss_att=41.191, acc=0.763, loss=42.342, backward_time=0.404, grad_norm=30.494, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.304e-04, train_time=4.828 +[gpub022:0/16] 2024-02-02 02:24:37,682 (trainer:737) INFO: 19epoch:train:12601-12700batch: iter_time=8.689e-05, forward_time=0.390, loss_ctc=58.026, loss_att=61.264, acc=0.735, loss=60.293, backward_time=0.457, grad_norm=37.939, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=2.304e-04, train_time=1.877 +[gpub022:0/16] 2024-02-02 02:24:58,653 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 02:26:53,015 (trainer:737) INFO: 19epoch:train:12701-12800batch: iter_time=8.733e-05, forward_time=0.291, loss_ctc=49.704, loss_att=49.580, acc=0.746, loss=49.617, backward_time=0.404, grad_norm=31.014, clip=100.000, loss_scale=1.469e+33, optim_step_time=0.093, optim0_lr0=2.303e-04, train_time=1.353 +[gpub022:0/16] 2024-02-02 02:29:25,994 (trainer:737) INFO: 19epoch:train:12801-12900batch: iter_time=9.223e-05, forward_time=0.290, loss_ctc=46.068, loss_att=40.563, acc=0.761, loss=42.215, backward_time=0.404, grad_norm=28.964, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.303e-04, train_time=1.530 +[gpub022:0/16] 2024-02-02 02:32:13,656 (trainer:737) INFO: 19epoch:train:12901-13000batch: iter_time=9.104e-05, forward_time=0.351, loss_ctc=54.515, loss_att=56.173, acc=0.724, loss=55.676, backward_time=0.446, grad_norm=38.477, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.100, optim0_lr0=2.303e-04, train_time=1.677 +[gpub022:0/16] 2024-02-02 02:34:43,490 (trainer:737) INFO: 19epoch:train:13001-13100batch: iter_time=8.844e-05, forward_time=0.298, loss_ctc=48.822, loss_att=47.594, acc=0.738, loss=47.963, backward_time=0.406, grad_norm=38.246, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.302e-04, train_time=1.498 +[gpub022:0/16] 2024-02-02 02:37:12,306 (trainer:737) INFO: 19epoch:train:13101-13200batch: iter_time=1.005e-04, forward_time=0.291, loss_ctc=49.627, loss_att=47.959, acc=0.750, loss=48.459, backward_time=0.406, grad_norm=32.737, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.302e-04, train_time=1.488 +[gpub022:0/16] 2024-02-02 02:40:00,962 (trainer:737) INFO: 19epoch:train:13201-13300batch: iter_time=7.429e-04, forward_time=0.339, loss_ctc=44.288, loss_att=40.151, acc=0.760, loss=41.392, backward_time=0.453, grad_norm=30.106, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.100, optim0_lr0=2.301e-04, train_time=1.686 +[gpub022:0/16] 2024-02-02 02:42:16,236 (trainer:737) INFO: 19epoch:train:13301-13400batch: iter_time=7.070e-04, forward_time=0.322, loss_ctc=59.059, loss_att=53.551, acc=0.732, loss=55.203, backward_time=0.411, grad_norm=49.476, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.097, optim0_lr0=2.301e-04, train_time=1.353 +[gpub022:0/16] 2024-02-02 02:45:08,521 (trainer:737) INFO: 19epoch:train:13401-13500batch: iter_time=9.401e-05, forward_time=0.292, loss_ctc=50.908, loss_att=47.357, acc=0.760, loss=48.422, backward_time=0.404, grad_norm=36.621, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.301e-04, train_time=1.723 +[gpub022:0/16] 2024-02-02 02:47:37,563 (trainer:737) INFO: 19epoch:train:13501-13600batch: iter_time=8.361e-05, forward_time=0.367, loss_ctc=41.012, loss_att=42.524, acc=0.750, loss=42.070, backward_time=0.430, grad_norm=29.083, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.101, optim0_lr0=2.300e-04, train_time=1.490 +[gpub022:0/16] 2024-02-02 02:50:01,159 (trainer:737) INFO: 19epoch:train:13601-13700batch: iter_time=5.522e-04, forward_time=0.334, loss_ctc=41.671, loss_att=42.527, acc=0.750, loss=42.270, backward_time=0.419, grad_norm=32.174, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.095, optim0_lr0=2.300e-04, train_time=1.436 +[gpub022:0/16] 2024-02-02 02:51:45,083 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub022:0/16] 2024-02-02 02:52:04,115 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 02:52:07,824 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 02:52:07,824 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub022:0/16] 2024-02-02 02:52:07,828 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 02:58:04,910 (trainer:737) INFO: 19epoch:train:13701-13800batch: iter_time=3.222, forward_time=0.291, loss_ctc=50.393, loss_att=47.101, acc=0.751, loss=48.089, backward_time=0.404, grad_norm=34.664, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.299e-04, train_time=4.837 +[gpub022:0/16] 2024-02-02 03:00:43,984 (trainer:737) INFO: 19epoch:train:13801-13900batch: iter_time=2.043e-04, forward_time=0.398, loss_ctc=56.242, loss_att=58.294, acc=0.730, loss=57.678, backward_time=0.436, grad_norm=39.565, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.096, optim0_lr0=2.299e-04, train_time=1.590 +[gpub022:0/16] 2024-02-02 03:03:19,032 (trainer:737) INFO: 19epoch:train:13901-14000batch: iter_time=8.807e-05, forward_time=0.304, loss_ctc=45.497, loss_att=52.649, acc=0.740, loss=50.503, backward_time=0.411, grad_norm=32.818, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.094, optim0_lr0=2.299e-04, train_time=1.551 +[gpub022:0/16] 2024-02-02 03:05:54,300 (trainer:737) INFO: 19epoch:train:14001-14100batch: iter_time=9.751e-05, forward_time=0.289, loss_ctc=50.863, loss_att=44.970, acc=0.740, loss=46.738, backward_time=0.403, grad_norm=34.511, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.094, optim0_lr0=2.298e-04, train_time=1.553 +[gpub022:0/16] 2024-02-02 03:08:56,385 (trainer:737) INFO: 19epoch:train:14101-14200batch: iter_time=1.484e-04, forward_time=0.385, loss_ctc=50.070, loss_att=53.098, acc=0.724, loss=52.190, backward_time=0.427, grad_norm=35.970, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.102, optim0_lr0=2.298e-04, train_time=1.820 +[gpub022:0/16] 2024-02-02 03:11:45,919 (trainer:737) INFO: 19epoch:train:14201-14300batch: iter_time=9.874e-05, forward_time=0.287, loss_ctc=50.190, loss_att=46.021, acc=0.737, loss=47.272, backward_time=0.401, grad_norm=42.176, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.297e-04, train_time=1.696 +[gpub022:0/16] 2024-02-02 03:14:37,252 (trainer:737) INFO: 19epoch:train:14301-14400batch: iter_time=0.001, forward_time=0.392, loss_ctc=51.394, loss_att=47.500, acc=0.748, loss=48.669, backward_time=0.455, grad_norm=34.089, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.297e-04, train_time=1.713 +[gpub022:0/16] 2024-02-02 03:17:32,916 (trainer:737) INFO: 19epoch:train:14401-14500batch: iter_time=8.819e-05, forward_time=0.289, loss_ctc=44.962, loss_att=40.311, acc=0.751, loss=41.706, backward_time=0.401, grad_norm=29.958, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.297e-04, train_time=1.756 +[gpub022:0/16] 2024-02-02 03:19:53,495 (trainer:737) INFO: 19epoch:train:14501-14600batch: iter_time=9.690e-05, forward_time=0.360, loss_ctc=51.443, loss_att=49.503, acc=0.731, loss=50.085, backward_time=0.429, grad_norm=37.968, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.099, optim0_lr0=2.296e-04, train_time=1.406 +[gpub022:0/16] 2024-02-02 03:22:37,177 (trainer:737) INFO: 19epoch:train:14601-14700batch: iter_time=4.917e-04, forward_time=0.314, loss_ctc=58.855, loss_att=54.946, acc=0.734, loss=56.118, backward_time=0.415, grad_norm=49.948, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.097, optim0_lr0=2.296e-04, train_time=1.636 +[gpub022:0/16] 2024-02-02 03:25:23,829 (trainer:737) INFO: 19epoch:train:14701-14800batch: iter_time=1.072e-04, forward_time=0.328, loss_ctc=42.524, loss_att=43.791, acc=0.761, loss=43.411, backward_time=0.419, grad_norm=28.581, clip=100.000, loss_scale=2.414e+33, optim_step_time=0.102, optim0_lr0=2.295e-04, train_time=1.666 +[gpub022:0/16] 2024-02-02 03:27:57,943 (trainer:737) INFO: 19epoch:train:14801-14900batch: iter_time=3.013e-04, forward_time=0.333, loss_ctc=42.092, loss_att=43.417, acc=0.741, loss=43.019, backward_time=0.424, grad_norm=30.493, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.295e-04, train_time=1.541 +[gpub022:0/16] 2024-02-02 03:30:31,927 (trainer:737) INFO: 19epoch:train:14901-15000batch: iter_time=1.003e-04, forward_time=0.321, loss_ctc=48.714, loss_att=45.655, acc=0.729, loss=46.573, backward_time=0.414, grad_norm=38.917, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.294e-04, train_time=1.540 +[gpub022:0/16] 2024-02-02 04:08:16,350 (trainer:343) INFO: 19epoch results: [train] iter_time=0.267, forward_time=0.325, loss_ctc=50.244, loss_att=48.350, acc=0.741, loss=48.918, backward_time=0.419, grad_norm=36.355, clip=100.000, loss_scale=5.218e+33, optim_step_time=0.096, optim0_lr0=2.325e-04, train_time=1.816, time=7 hours, 34 minutes and 30.12 seconds, total_count=315000, gpu_max_cached_mem_GB=41.891, [valid] loss_ctc=40.729, cer_ctc=0.208, loss_att=43.390, acc=0.656, cer=0.299, wer=0.993, loss=42.592, time=37 minutes and 19.77 seconds, total_count=98091, gpu_max_cached_mem_GB=41.891 +[gpub022:0/16] 2024-02-02 04:08:26,811 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub022:0/16] 2024-02-02 04:08:26,975 (trainer:272) INFO: 20/45epoch started. Estimated time to finish: 1 week, 2 days and 7 hours +[gpub022:0/16] 2024-02-02 04:08:27,170 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub022:0/16] 2024-02-02 04:08:45,084 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 04:08:48,466 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 04:08:48,466 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub022:0/16] 2024-02-02 04:08:48,470 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 04:15:25,586 (trainer:737) INFO: 20epoch:train:1-100batch: iter_time=2.754, forward_time=0.328, loss_ctc=43.687, loss_att=44.593, acc=0.730, loss=44.321, backward_time=0.406, grad_norm=32.312, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.294e-04, train_time=4.185 +[gpub022:0/16] 2024-02-02 04:18:04,680 (trainer:737) INFO: 20epoch:train:101-200batch: iter_time=6.730e-04, forward_time=0.354, loss_ctc=44.083, loss_att=42.887, acc=0.746, loss=43.246, backward_time=0.411, grad_norm=32.758, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.294e-04, train_time=1.590 +[gpub022:0/16] 2024-02-02 04:20:42,961 (trainer:737) INFO: 20epoch:train:201-300batch: iter_time=1.989e-04, forward_time=0.368, loss_ctc=52.078, loss_att=47.727, acc=0.743, loss=49.032, backward_time=0.425, grad_norm=36.511, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.293e-04, train_time=1.583 +[gpub022:0/16] 2024-02-02 04:23:20,699 (trainer:737) INFO: 20epoch:train:301-400batch: iter_time=1.058e-04, forward_time=0.312, loss_ctc=59.159, loss_att=53.034, acc=0.736, loss=54.871, backward_time=0.424, grad_norm=40.122, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.293e-04, train_time=1.577 +[gpub022:0/16] 2024-02-02 04:26:02,099 (trainer:737) INFO: 20epoch:train:401-500batch: iter_time=5.419e-04, forward_time=0.328, loss_ctc=52.431, loss_att=50.398, acc=0.753, loss=51.008, backward_time=0.454, grad_norm=40.787, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.292e-04, train_time=1.611 +[gpub022:0/16] 2024-02-02 04:28:35,659 (trainer:737) INFO: 20epoch:train:501-600batch: iter_time=4.666e-04, forward_time=0.350, loss_ctc=49.950, loss_att=45.587, acc=0.738, loss=46.896, backward_time=0.411, grad_norm=33.162, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.292e-04, train_time=1.535 +[gpub022:0/16] 2024-02-02 04:31:15,787 (trainer:737) INFO: 20epoch:train:601-700batch: iter_time=9.214e-04, forward_time=0.335, loss_ctc=60.390, loss_att=58.162, acc=0.721, loss=58.830, backward_time=0.442, grad_norm=41.871, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.292e-04, train_time=1.602 +[gpub022:0/16] 2024-02-02 04:33:56,489 (trainer:737) INFO: 20epoch:train:701-800batch: iter_time=4.174e-04, forward_time=0.338, loss_ctc=51.298, loss_att=58.457, acc=0.717, loss=56.310, backward_time=0.415, grad_norm=37.837, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.291e-04, train_time=1.607 +[gpub022:0/16] 2024-02-02 04:36:40,968 (trainer:737) INFO: 20epoch:train:801-900batch: iter_time=4.293e-04, forward_time=0.343, loss_ctc=56.273, loss_att=59.405, acc=0.721, loss=58.465, backward_time=0.445, grad_norm=37.346, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.291e-04, train_time=1.644 +[gpub022:0/16] 2024-02-02 04:39:15,152 (trainer:737) INFO: 20epoch:train:901-1000batch: iter_time=5.310e-04, forward_time=0.333, loss_ctc=66.196, loss_att=60.288, acc=0.711, loss=62.061, backward_time=0.418, grad_norm=42.392, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.290e-04, train_time=1.541 +[gpub022:0/16] 2024-02-02 04:42:01,412 (trainer:737) INFO: 20epoch:train:1001-1100batch: iter_time=6.978e-04, forward_time=0.346, loss_ctc=52.569, loss_att=47.068, acc=0.736, loss=48.718, backward_time=0.423, grad_norm=34.898, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.290e-04, train_time=1.664 +[gpub022:0/16] 2024-02-02 04:44:33,015 (trainer:737) INFO: 20epoch:train:1101-1200batch: iter_time=3.511e-04, forward_time=0.322, loss_ctc=54.076, loss_att=52.745, acc=0.718, loss=53.144, backward_time=0.416, grad_norm=39.824, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.290e-04, train_time=1.514 +[gpub022:0/16] 2024-02-02 04:46:11,919 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub022:0/16] 2024-02-02 04:46:30,803 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 04:46:34,345 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 04:46:34,345 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub022:0/16] 2024-02-02 04:46:34,435 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 04:52:17,100 (trainer:737) INFO: 20epoch:train:1201-1300batch: iter_time=3.074, forward_time=0.297, loss_ctc=47.802, loss_att=45.092, acc=0.755, loss=45.905, backward_time=0.406, grad_norm=30.654, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.289e-04, train_time=4.642 +[gpub022:0/16] 2024-02-02 04:54:30,176 (trainer:737) INFO: 20epoch:train:1301-1400batch: iter_time=8.401e-05, forward_time=0.296, loss_ctc=46.352, loss_att=48.871, acc=0.748, loss=48.115, backward_time=0.411, grad_norm=33.016, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.289e-04, train_time=1.331 +[gpub022:0/16] 2024-02-02 04:57:07,235 (trainer:737) INFO: 20epoch:train:1401-1500batch: iter_time=3.670e-04, forward_time=0.379, loss_ctc=46.567, loss_att=41.687, acc=0.763, loss=43.151, backward_time=0.432, grad_norm=32.188, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.288e-04, train_time=1.570 +[gpub022:0/16] 2024-02-02 04:59:45,039 (trainer:737) INFO: 20epoch:train:1501-1600batch: iter_time=9.269e-05, forward_time=0.324, loss_ctc=49.825, loss_att=51.840, acc=0.738, loss=51.235, backward_time=0.410, grad_norm=36.161, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.288e-04, train_time=1.577 +[gpub022:0/16] 2024-02-02 05:01:58,765 (trainer:737) INFO: 20epoch:train:1601-1700batch: iter_time=1.052e-04, forward_time=0.295, loss_ctc=55.836, loss_att=53.794, acc=0.753, loss=54.407, backward_time=0.408, grad_norm=38.574, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.288e-04, train_time=1.338 +[gpub022:0/16] 2024-02-02 05:04:31,527 (trainer:737) INFO: 20epoch:train:1701-1800batch: iter_time=6.492e-04, forward_time=0.368, loss_ctc=52.106, loss_att=47.068, acc=0.753, loss=48.580, backward_time=0.439, grad_norm=34.419, clip=100.000, loss_scale=4.829e+33, optim_step_time=0.099, optim0_lr0=2.287e-04, train_time=1.528 +[gpub022:0/16] 2024-02-02 05:07:11,072 (trainer:737) INFO: 20epoch:train:1801-1900batch: iter_time=9.409e-05, forward_time=0.320, loss_ctc=51.681, loss_att=51.575, acc=0.746, loss=51.607, backward_time=0.424, grad_norm=33.245, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.287e-04, train_time=1.595 +[gpub022:0/16] 2024-02-02 05:09:30,155 (trainer:737) INFO: 20epoch:train:1901-2000batch: iter_time=1.030e-04, forward_time=0.292, loss_ctc=56.920, loss_att=58.719, acc=0.722, loss=58.180, backward_time=0.405, grad_norm=40.448, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.286e-04, train_time=1.390 +[gpub022:0/16] 2024-02-02 05:11:51,913 (trainer:737) INFO: 20epoch:train:2001-2100batch: iter_time=4.340e-04, forward_time=0.300, loss_ctc=51.268, loss_att=59.486, acc=0.732, loss=57.021, backward_time=0.419, grad_norm=34.757, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.286e-04, train_time=1.418 +[gpub022:0/16] 2024-02-02 05:14:34,232 (trainer:737) INFO: 20epoch:train:2101-2200batch: iter_time=8.974e-05, forward_time=0.353, loss_ctc=52.717, loss_att=55.029, acc=0.739, loss=54.335, backward_time=0.438, grad_norm=32.794, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.286e-04, train_time=1.623 +[gpub022:0/16] 2024-02-02 05:16:58,035 (trainer:737) INFO: 20epoch:train:2201-2300batch: iter_time=8.969e-05, forward_time=0.318, loss_ctc=64.003, loss_att=57.765, acc=0.733, loss=59.636, backward_time=0.420, grad_norm=40.081, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.285e-04, train_time=1.438 +[gpub022:0/16] 2024-02-02 05:19:21,589 (trainer:737) INFO: 20epoch:train:2301-2400batch: iter_time=3.113e-04, forward_time=0.293, loss_ctc=50.893, loss_att=49.111, acc=0.737, loss=49.645, backward_time=0.412, grad_norm=32.866, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.285e-04, train_time=1.435 +[gpub022:0/16] 2024-02-02 05:21:49,831 (trainer:737) INFO: 20epoch:train:2401-2500batch: iter_time=1.009e-04, forward_time=0.335, loss_ctc=59.208, loss_att=58.950, acc=0.729, loss=59.027, backward_time=0.456, grad_norm=41.123, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.284e-04, train_time=1.482 +[gpub022:0/16] 2024-02-02 05:22:09,859 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub022:0/16] 2024-02-02 05:22:28,533 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 05:22:32,149 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 05:22:32,149 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub022:0/16] 2024-02-02 05:22:32,152 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 05:29:07,951 (trainer:737) INFO: 20epoch:train:2501-2600batch: iter_time=2.918, forward_time=0.305, loss_ctc=42.802, loss_att=44.127, acc=0.750, loss=43.730, backward_time=0.401, grad_norm=30.235, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.284e-04, train_time=4.381 +[gpub022:0/16] 2024-02-02 05:31:30,380 (trainer:737) INFO: 20epoch:train:2601-2700batch: iter_time=3.768e-04, forward_time=0.302, loss_ctc=42.909, loss_att=42.214, acc=0.755, loss=42.422, backward_time=0.411, grad_norm=31.105, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.284e-04, train_time=1.424 +[gpub022:0/16] 2024-02-02 05:33:57,265 (trainer:737) INFO: 20epoch:train:2701-2800batch: iter_time=8.778e-05, forward_time=0.295, loss_ctc=49.604, loss_att=45.661, acc=0.758, loss=46.844, backward_time=0.411, grad_norm=34.217, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.283e-04, train_time=1.468 +[gpub022:0/16] 2024-02-02 05:36:31,000 (trainer:737) INFO: 20epoch:train:2801-2900batch: iter_time=1.015e-04, forward_time=0.372, loss_ctc=55.707, loss_att=51.377, acc=0.752, loss=52.676, backward_time=0.433, grad_norm=37.090, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.283e-04, train_time=1.537 +[gpub022:0/16] 2024-02-02 05:38:51,900 (trainer:737) INFO: 20epoch:train:2901-3000batch: iter_time=1.093e-04, forward_time=0.314, loss_ctc=50.957, loss_att=51.052, acc=0.759, loss=51.023, backward_time=0.408, grad_norm=33.149, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.283e-04, train_time=1.409 +[gpub022:0/16] 2024-02-02 05:41:15,325 (trainer:737) INFO: 20epoch:train:3001-3100batch: iter_time=4.406e-04, forward_time=0.294, loss_ctc=48.908, loss_att=45.431, acc=0.749, loss=46.474, backward_time=0.408, grad_norm=30.674, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.282e-04, train_time=1.434 +[gpub022:0/16] 2024-02-02 05:43:53,774 (trainer:737) INFO: 20epoch:train:3101-3200batch: iter_time=9.799e-05, forward_time=0.333, loss_ctc=57.179, loss_att=57.449, acc=0.733, loss=57.368, backward_time=0.463, grad_norm=37.293, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.282e-04, train_time=1.584 +[gpub022:0/16] 2024-02-02 05:46:26,273 (trainer:737) INFO: 20epoch:train:3201-3300batch: iter_time=1.114e-04, forward_time=0.310, loss_ctc=50.524, loss_att=57.111, acc=0.735, loss=55.135, backward_time=0.410, grad_norm=38.142, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.281e-04, train_time=1.525 +[gpub022:0/16] 2024-02-02 05:48:44,100 (trainer:737) INFO: 20epoch:train:3301-3400batch: iter_time=3.322e-04, forward_time=0.304, loss_ctc=54.303, loss_att=57.955, acc=0.740, loss=56.859, backward_time=0.417, grad_norm=34.439, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.281e-04, train_time=1.378 +[gpub022:0/16] 2024-02-02 05:51:18,750 (trainer:737) INFO: 20epoch:train:3401-3500batch: iter_time=1.119e-04, forward_time=0.299, loss_ctc=63.733, loss_att=61.290, acc=0.720, loss=62.022, backward_time=0.411, grad_norm=39.924, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.281e-04, train_time=1.546 +[gpub022:0/16] 2024-02-02 05:53:53,999 (trainer:737) INFO: 20epoch:train:3501-3600batch: iter_time=1.117e-04, forward_time=0.359, loss_ctc=51.450, loss_att=45.665, acc=0.755, loss=47.401, backward_time=0.434, grad_norm=33.463, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.280e-04, train_time=1.552 +[gpub022:0/16] 2024-02-02 05:56:10,631 (trainer:737) INFO: 20epoch:train:3601-3700batch: iter_time=0.002, forward_time=0.316, loss_ctc=52.303, loss_att=52.574, acc=0.723, loss=52.493, backward_time=0.404, grad_norm=38.646, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.280e-04, train_time=1.366 +[gpub022:0/16] 2024-02-02 05:57:41,452 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub022:0/16] 2024-02-02 05:58:00,211 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 05:58:04,048 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 05:58:04,048 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub022:0/16] 2024-02-02 05:58:04,052 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 06:03:36,308 (trainer:737) INFO: 20epoch:train:3701-3800batch: iter_time=3.045, forward_time=0.297, loss_ctc=46.973, loss_att=47.854, acc=0.751, loss=47.590, backward_time=0.409, grad_norm=30.469, clip=100.000, loss_scale=9.658e+33, optim_step_time=0.094, optim0_lr0=2.279e-04, train_time=4.457 +[gpub022:0/16] 2024-02-02 06:05:57,180 (trainer:737) INFO: 20epoch:train:3801-3900batch: iter_time=8.609e-05, forward_time=0.346, loss_ctc=46.100, loss_att=46.952, acc=0.755, loss=46.696, backward_time=0.423, grad_norm=32.602, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.279e-04, train_time=1.408 +[gpub022:0/16] 2024-02-02 06:06:03,055 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 06:08:14,283 (trainer:737) INFO: 20epoch:train:3901-4000batch: iter_time=9.268e-05, forward_time=0.307, loss_ctc=46.352, loss_att=40.836, acc=0.767, loss=42.491, backward_time=0.410, grad_norm=33.265, clip=100.000, loss_scale=5.350e+33, optim_step_time=0.093, optim0_lr0=2.279e-04, train_time=1.370 +[gpub022:0/16] 2024-02-02 06:10:56,541 (trainer:737) INFO: 20epoch:train:4001-4100batch: iter_time=7.729e-04, forward_time=0.312, loss_ctc=48.982, loss_att=49.802, acc=0.744, loss=49.556, backward_time=0.419, grad_norm=33.707, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.278e-04, train_time=1.622 +[gpub022:0/16] 2024-02-02 06:13:11,992 (trainer:737) INFO: 20epoch:train:4101-4200batch: iter_time=9.701e-05, forward_time=0.295, loss_ctc=53.890, loss_att=52.115, acc=0.757, loss=52.648, backward_time=0.414, grad_norm=37.700, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.278e-04, train_time=1.355 +[gpub022:0/16] 2024-02-02 06:15:48,812 (trainer:737) INFO: 20epoch:train:4201-4300batch: iter_time=8.522e-05, forward_time=0.332, loss_ctc=51.588, loss_att=46.403, acc=0.757, loss=47.958, backward_time=0.443, grad_norm=33.283, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.277e-04, train_time=1.568 +[gpub022:0/16] 2024-02-02 06:18:18,907 (trainer:737) INFO: 20epoch:train:4301-4400batch: iter_time=9.118e-05, forward_time=0.314, loss_ctc=51.136, loss_att=50.633, acc=0.750, loss=50.784, backward_time=0.410, grad_norm=31.747, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.277e-04, train_time=1.500 +[gpub022:0/16] 2024-02-02 06:20:39,206 (trainer:737) INFO: 20epoch:train:4401-4500batch: iter_time=2.389e-04, forward_time=0.297, loss_ctc=54.420, loss_att=57.373, acc=0.727, loss=56.487, backward_time=0.411, grad_norm=38.351, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.277e-04, train_time=1.403 +[gpub022:0/16] 2024-02-02 06:23:14,863 (trainer:737) INFO: 20epoch:train:4501-4600batch: iter_time=9.735e-05, forward_time=0.293, loss_ctc=50.334, loss_att=58.751, acc=0.735, loss=56.226, backward_time=0.408, grad_norm=33.699, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.276e-04, train_time=1.557 +[gpub022:0/16] 2024-02-02 06:25:50,501 (trainer:737) INFO: 20epoch:train:4601-4700batch: iter_time=9.006e-05, forward_time=0.339, loss_ctc=52.000, loss_att=53.754, acc=0.742, loss=53.228, backward_time=0.434, grad_norm=33.566, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.276e-04, train_time=1.556 +[gpub022:0/16] 2024-02-02 06:28:08,913 (trainer:737) INFO: 20epoch:train:4701-4800batch: iter_time=9.687e-05, forward_time=0.306, loss_ctc=63.050, loss_att=57.669, acc=0.733, loss=59.284, backward_time=0.413, grad_norm=40.376, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.275e-04, train_time=1.384 +[gpub022:0/16] 2024-02-02 06:30:43,455 (trainer:737) INFO: 20epoch:train:4801-4900batch: iter_time=4.671e-04, forward_time=0.306, loss_ctc=50.069, loss_att=49.114, acc=0.738, loss=49.400, backward_time=0.413, grad_norm=32.099, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.275e-04, train_time=1.545 +[gpub022:0/16] 2024-02-02 06:33:15,895 (trainer:737) INFO: 20epoch:train:4901-5000batch: iter_time=9.234e-05, forward_time=0.304, loss_ctc=57.411, loss_att=58.213, acc=0.731, loss=57.972, backward_time=0.423, grad_norm=38.633, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.275e-04, train_time=1.525 +[gpub022:0/16] 2024-02-02 06:33:35,950 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub022:0/16] 2024-02-02 06:33:54,979 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 06:33:58,794 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 06:33:58,794 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub022:0/16] 2024-02-02 06:33:58,798 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 06:40:36,069 (trainer:737) INFO: 20epoch:train:5001-5100batch: iter_time=3.002, forward_time=0.335, loss_ctc=42.293, loss_att=45.627, acc=0.734, loss=44.627, backward_time=0.408, grad_norm=31.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.274e-04, train_time=4.401 +[gpub022:0/16] 2024-02-02 06:42:59,139 (trainer:737) INFO: 20epoch:train:5101-5200batch: iter_time=1.030e-04, forward_time=0.289, loss_ctc=42.948, loss_att=42.435, acc=0.750, loss=42.589, backward_time=0.405, grad_norm=31.019, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.274e-04, train_time=1.431 +[gpub022:0/16] 2024-02-02 06:45:53,964 (trainer:737) INFO: 20epoch:train:5201-5300batch: iter_time=9.435e-05, forward_time=0.383, loss_ctc=49.204, loss_att=46.358, acc=0.752, loss=47.212, backward_time=0.433, grad_norm=35.444, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.273e-04, train_time=1.747 +[gpub022:0/16] 2024-02-02 06:46:48,878 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 06:48:07,409 (trainer:737) INFO: 20epoch:train:5301-5400batch: iter_time=0.002, forward_time=0.293, loss_ctc=54.818, loss_att=51.827, acc=0.737, loss=52.724, backward_time=0.411, grad_norm=37.801, clip=100.000, loss_scale=3.671e+33, optim_step_time=0.093, optim0_lr0=2.273e-04, train_time=1.335 +[gpub022:0/16] 2024-02-02 06:50:41,885 (trainer:737) INFO: 20epoch:train:5401-5500batch: iter_time=9.991e-05, forward_time=0.343, loss_ctc=50.483, loss_att=49.180, acc=0.759, loss=49.571, backward_time=0.438, grad_norm=33.025, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.273e-04, train_time=1.545 +[gpub022:0/16] 2024-02-02 06:53:22,813 (trainer:737) INFO: 20epoch:train:5501-5600batch: iter_time=9.907e-05, forward_time=0.291, loss_ctc=47.934, loss_att=44.966, acc=0.744, loss=45.857, backward_time=0.406, grad_norm=31.445, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.272e-04, train_time=1.609 +[gpub022:0/16] 2024-02-02 06:55:43,230 (trainer:737) INFO: 20epoch:train:5601-5700batch: iter_time=8.396e-05, forward_time=0.291, loss_ctc=56.370, loss_att=57.211, acc=0.726, loss=56.959, backward_time=0.406, grad_norm=38.372, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.272e-04, train_time=1.404 +[gpub022:0/16] 2024-02-02 06:58:15,066 (trainer:737) INFO: 20epoch:train:5701-5800batch: iter_time=9.339e-05, forward_time=0.403, loss_ctc=48.793, loss_att=57.872, acc=0.724, loss=55.149, backward_time=0.424, grad_norm=35.691, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.272e-04, train_time=1.518 +[gpub022:0/16] 2024-02-02 07:01:09,570 (trainer:737) INFO: 20epoch:train:5801-5900batch: iter_time=8.993e-05, forward_time=0.296, loss_ctc=54.328, loss_att=58.278, acc=0.726, loss=57.093, backward_time=0.408, grad_norm=35.465, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.271e-04, train_time=1.745 +[gpub022:0/16] 2024-02-02 07:03:20,604 (trainer:737) INFO: 20epoch:train:5901-6000batch: iter_time=8.665e-05, forward_time=0.294, loss_ctc=62.813, loss_att=59.595, acc=0.714, loss=60.560, backward_time=0.414, grad_norm=41.847, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.271e-04, train_time=1.310 +[gpub022:0/16] 2024-02-02 07:05:54,198 (trainer:737) INFO: 20epoch:train:6001-6100batch: iter_time=1.005e-04, forward_time=0.359, loss_ctc=50.919, loss_att=45.218, acc=0.746, loss=46.928, backward_time=0.459, grad_norm=34.166, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.270e-04, train_time=1.536 +[gpub022:0/16] 2024-02-02 07:08:31,985 (trainer:737) INFO: 20epoch:train:6101-6200batch: iter_time=8.844e-05, forward_time=0.290, loss_ctc=52.138, loss_att=49.990, acc=0.728, loss=50.634, backward_time=0.404, grad_norm=37.222, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.270e-04, train_time=1.578 +[gpub022:0/16] 2024-02-02 07:09:59,923 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub022:0/16] 2024-02-02 07:10:19,021 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 07:10:22,968 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 07:10:22,968 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub022:0/16] 2024-02-02 07:10:22,971 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 07:16:10,263 (trainer:737) INFO: 20epoch:train:6201-6300batch: iter_time=3.131, forward_time=0.370, loss_ctc=46.698, loss_att=44.931, acc=0.749, loss=45.461, backward_time=0.422, grad_norm=29.430, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.270e-04, train_time=4.582 +[gpub022:0/16] 2024-02-02 07:18:51,280 (trainer:737) INFO: 20epoch:train:6301-6400batch: iter_time=9.772e-05, forward_time=0.290, loss_ctc=45.111, loss_att=45.068, acc=0.746, loss=45.081, backward_time=0.403, grad_norm=33.043, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.269e-04, train_time=1.610 +[gpub022:0/16] 2024-02-02 07:21:25,983 (trainer:737) INFO: 20epoch:train:6401-6500batch: iter_time=9.415e-05, forward_time=0.375, loss_ctc=45.620, loss_att=39.439, acc=0.768, loss=41.293, backward_time=0.428, grad_norm=30.806, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=2.269e-04, train_time=1.547 +[gpub022:0/16] 2024-02-02 07:23:56,398 (trainer:737) INFO: 20epoch:train:6501-6600batch: iter_time=9.089e-05, forward_time=0.292, loss_ctc=48.786, loss_att=50.346, acc=0.732, loss=49.878, backward_time=0.406, grad_norm=37.115, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.268e-04, train_time=1.504 +[gpub022:0/16] 2024-02-02 07:26:21,796 (trainer:737) INFO: 20epoch:train:6601-6700batch: iter_time=9.253e-05, forward_time=0.293, loss_ctc=53.387, loss_att=51.988, acc=0.749, loss=52.408, backward_time=0.407, grad_norm=44.189, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.268e-04, train_time=1.454 +[gpub022:0/16] 2024-02-02 07:28:53,938 (trainer:737) INFO: 20epoch:train:6701-6800batch: iter_time=9.299e-05, forward_time=0.341, loss_ctc=50.959, loss_att=44.483, acc=0.757, loss=46.426, backward_time=0.450, grad_norm=33.292, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.268e-04, train_time=1.521 +[gpub022:0/16] 2024-02-02 07:31:38,541 (trainer:737) INFO: 20epoch:train:6801-6900batch: iter_time=1.067e-04, forward_time=0.293, loss_ctc=50.597, loss_att=50.738, acc=0.739, loss=50.696, backward_time=0.406, grad_norm=32.732, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.267e-04, train_time=1.646 +[gpub022:0/16] 2024-02-02 07:33:48,520 (trainer:737) INFO: 20epoch:train:6901-7000batch: iter_time=9.288e-05, forward_time=0.291, loss_ctc=53.457, loss_att=56.937, acc=0.718, loss=55.893, backward_time=0.405, grad_norm=38.770, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.267e-04, train_time=1.300 +[gpub022:0/16] 2024-02-02 07:36:18,388 (trainer:737) INFO: 20epoch:train:7001-7100batch: iter_time=9.905e-05, forward_time=0.328, loss_ctc=49.553, loss_att=57.661, acc=0.721, loss=55.229, backward_time=0.453, grad_norm=34.438, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.266e-04, train_time=1.498 +[gpub022:0/16] 2024-02-02 07:39:02,629 (trainer:737) INFO: 20epoch:train:7101-7200batch: iter_time=9.348e-05, forward_time=0.292, loss_ctc=51.461, loss_att=51.083, acc=0.741, loss=51.197, backward_time=0.406, grad_norm=31.558, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.266e-04, train_time=1.642 +[gpub022:0/16] 2024-02-02 07:41:13,132 (trainer:737) INFO: 20epoch:train:7201-7300batch: iter_time=9.812e-05, forward_time=0.292, loss_ctc=61.001, loss_att=57.132, acc=0.725, loss=58.292, backward_time=0.405, grad_norm=39.089, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.266e-04, train_time=1.305 +[gpub022:0/16] 2024-02-02 07:43:53,188 (trainer:737) INFO: 20epoch:train:7301-7400batch: iter_time=1.038e-04, forward_time=0.377, loss_ctc=50.244, loss_att=47.907, acc=0.742, loss=48.608, backward_time=0.445, grad_norm=33.344, clip=100.000, loss_scale=4.102e+33, optim_step_time=0.101, optim0_lr0=2.265e-04, train_time=1.600 +[gpub022:0/16] 2024-02-02 07:46:35,636 (trainer:737) INFO: 20epoch:train:7401-7500batch: iter_time=8.906e-05, forward_time=0.293, loss_ctc=57.152, loss_att=54.377, acc=0.734, loss=55.210, backward_time=0.407, grad_norm=38.259, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.265e-04, train_time=1.624 +[gpub022:0/16] 2024-02-02 07:46:55,688 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub022:0/16] 2024-02-02 07:47:14,938 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 07:47:18,954 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 07:47:18,954 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub022:0/16] 2024-02-02 07:47:18,958 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 07:54:16,581 (trainer:737) INFO: 20epoch:train:7501-7600batch: iter_time=2.959, forward_time=0.402, loss_ctc=42.064, loss_att=42.673, acc=0.742, loss=42.490, backward_time=0.418, grad_norm=30.330, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.265e-04, train_time=4.609 +[gpub022:0/16] 2024-02-02 07:57:03,550 (trainer:737) INFO: 20epoch:train:7601-7700batch: iter_time=8.892e-05, forward_time=0.309, loss_ctc=42.370, loss_att=40.688, acc=0.757, loss=41.193, backward_time=0.435, grad_norm=32.242, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.264e-04, train_time=1.669 +[gpub022:0/16] 2024-02-02 07:59:33,377 (trainer:737) INFO: 20epoch:train:7701-7800batch: iter_time=8.193e-05, forward_time=0.291, loss_ctc=48.824, loss_att=45.383, acc=0.754, loss=46.415, backward_time=0.405, grad_norm=34.832, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.264e-04, train_time=1.499 +[gpub022:0/16] 2024-02-02 08:02:18,337 (trainer:737) INFO: 20epoch:train:7801-7900batch: iter_time=9.057e-05, forward_time=0.416, loss_ctc=54.281, loss_att=50.687, acc=0.742, loss=51.765, backward_time=0.427, grad_norm=37.933, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.263e-04, train_time=1.648 +[gpub022:0/16] 2024-02-02 08:04:53,655 (trainer:737) INFO: 20epoch:train:7901-8000batch: iter_time=8.993e-05, forward_time=0.291, loss_ctc=50.301, loss_att=48.700, acc=0.761, loss=49.180, backward_time=0.403, grad_norm=33.842, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.263e-04, train_time=1.554 +[gpub022:0/16] 2024-02-02 08:07:22,316 (trainer:737) INFO: 20epoch:train:8001-8100batch: iter_time=0.001, forward_time=0.421, loss_ctc=47.734, loss_att=43.866, acc=0.748, loss=45.026, backward_time=0.423, grad_norm=31.957, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=2.263e-04, train_time=1.484 +[gpub022:0/16] 2024-02-02 08:10:07,511 (trainer:737) INFO: 20epoch:train:8101-8200batch: iter_time=1.019e-04, forward_time=0.292, loss_ctc=56.282, loss_att=56.428, acc=0.728, loss=56.384, backward_time=0.406, grad_norm=39.934, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.262e-04, train_time=1.654 +[gpub022:0/16] 2024-02-02 08:12:53,822 (trainer:737) INFO: 20epoch:train:8201-8300batch: iter_time=3.385e-04, forward_time=0.382, loss_ctc=48.546, loss_att=56.184, acc=0.726, loss=53.893, backward_time=0.437, grad_norm=35.762, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.262e-04, train_time=1.663 +[gpub022:0/16] 2024-02-02 08:15:04,415 (trainer:737) INFO: 20epoch:train:8301-8400batch: iter_time=1.003e-04, forward_time=0.292, loss_ctc=53.709, loss_att=56.846, acc=0.728, loss=55.905, backward_time=0.408, grad_norm=34.931, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.261e-04, train_time=1.304 +[gpub022:0/16] 2024-02-02 08:17:55,306 (trainer:737) INFO: 20epoch:train:8401-8500batch: iter_time=1.018e-04, forward_time=0.331, loss_ctc=62.523, loss_att=58.696, acc=0.721, loss=59.845, backward_time=0.416, grad_norm=40.505, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.261e-04, train_time=1.710 +[gpub022:0/16] 2024-02-02 08:20:22,011 (trainer:737) INFO: 20epoch:train:8501-8600batch: iter_time=1.099e-04, forward_time=0.352, loss_ctc=50.990, loss_att=45.723, acc=0.743, loss=47.303, backward_time=0.419, grad_norm=33.393, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.261e-04, train_time=1.466 +[gpub022:0/16] 2024-02-02 08:22:47,509 (trainer:737) INFO: 20epoch:train:8601-8700batch: iter_time=3.835e-04, forward_time=0.292, loss_ctc=50.806, loss_att=50.685, acc=0.725, loss=50.722, backward_time=0.413, grad_norm=37.271, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.260e-04, train_time=1.456 +[gpub022:0/16] 2024-02-02 08:24:26,690 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub022:0/16] 2024-02-02 08:24:45,466 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 08:24:48,921 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 08:24:48,921 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub022:0/16] 2024-02-02 08:24:48,925 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 08:30:22,956 (trainer:737) INFO: 20epoch:train:8701-8800batch: iter_time=2.959, forward_time=0.388, loss_ctc=46.715, loss_att=44.979, acc=0.758, loss=45.500, backward_time=0.436, grad_norm=30.531, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.260e-04, train_time=4.554 +[gpub022:0/16] 2024-02-02 08:32:37,388 (trainer:737) INFO: 20epoch:train:8801-8900batch: iter_time=2.015e-04, forward_time=0.305, loss_ctc=45.045, loss_att=48.270, acc=0.753, loss=47.302, backward_time=0.412, grad_norm=33.454, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.259e-04, train_time=1.344 +[gpub022:0/16] 2024-02-02 08:35:07,531 (trainer:737) INFO: 20epoch:train:8901-9000batch: iter_time=8.673e-05, forward_time=0.394, loss_ctc=45.635, loss_att=40.960, acc=0.767, loss=42.362, backward_time=0.422, grad_norm=31.277, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.259e-04, train_time=1.501 +[gpub022:0/16] 2024-02-02 08:37:31,006 (trainer:737) INFO: 20epoch:train:9001-9100batch: iter_time=9.016e-05, forward_time=0.292, loss_ctc=48.655, loss_att=50.928, acc=0.740, loss=50.246, backward_time=0.406, grad_norm=34.955, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.259e-04, train_time=1.435 +[gpub022:0/16] 2024-02-02 08:40:13,269 (trainer:737) INFO: 20epoch:train:9101-9200batch: iter_time=1.008e-04, forward_time=0.400, loss_ctc=53.045, loss_att=52.706, acc=0.759, loss=52.808, backward_time=0.443, grad_norm=38.092, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.258e-04, train_time=1.622 +[gpub022:0/16] 2024-02-02 08:42:25,182 (trainer:737) INFO: 20epoch:train:9201-9300batch: iter_time=1.042e-04, forward_time=0.291, loss_ctc=50.783, loss_att=46.089, acc=0.761, loss=47.497, backward_time=0.405, grad_norm=34.326, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.258e-04, train_time=1.319 +[gpub022:0/16] 2024-02-02 08:44:56,494 (trainer:737) INFO: 20epoch:train:9301-9400batch: iter_time=1.032e-04, forward_time=0.294, loss_ctc=50.119, loss_att=50.220, acc=0.752, loss=50.190, backward_time=0.406, grad_norm=32.856, clip=100.000, loss_scale=8.204e+33, optim_step_time=0.093, optim0_lr0=2.258e-04, train_time=1.513 +[gpub022:0/16] 2024-02-02 08:47:41,187 (trainer:737) INFO: 20epoch:train:9401-9500batch: iter_time=9.544e-05, forward_time=0.357, loss_ctc=52.943, loss_att=57.287, acc=0.728, loss=55.984, backward_time=0.465, grad_norm=37.465, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.257e-04, train_time=1.647 +[gpub022:0/16] 2024-02-02 08:50:10,382 (trainer:737) INFO: 20epoch:train:9501-9600batch: iter_time=1.046e-04, forward_time=0.295, loss_ctc=49.731, loss_att=58.604, acc=0.737, loss=55.942, backward_time=0.408, grad_norm=34.356, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.257e-04, train_time=1.491 +[gpub022:0/16] 2024-02-02 08:52:21,505 (trainer:737) INFO: 20epoch:train:9601-9700batch: iter_time=9.536e-05, forward_time=0.298, loss_ctc=51.354, loss_att=54.062, acc=0.746, loss=53.249, backward_time=0.410, grad_norm=32.463, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.256e-04, train_time=1.312 +[gpub022:0/16] 2024-02-02 08:54:49,257 (trainer:737) INFO: 20epoch:train:9701-9800batch: iter_time=5.250e-04, forward_time=0.394, loss_ctc=62.012, loss_att=57.087, acc=0.737, loss=58.565, backward_time=0.445, grad_norm=41.704, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.256e-04, train_time=1.477 +[gpub022:0/16] 2024-02-02 08:57:56,305 (trainer:737) INFO: 20epoch:train:9801-9900batch: iter_time=9.845e-05, forward_time=0.310, loss_ctc=49.805, loss_att=49.403, acc=0.739, loss=49.524, backward_time=0.411, grad_norm=34.074, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.256e-04, train_time=1.871 +[gpub022:0/16] 2024-02-02 09:00:21,416 (trainer:737) INFO: 20epoch:train:9901-10000batch: iter_time=8.600e-05, forward_time=0.368, loss_ctc=55.911, loss_att=57.605, acc=0.733, loss=57.097, backward_time=0.457, grad_norm=37.913, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.255e-04, train_time=1.450 +[gpub022:0/16] 2024-02-02 09:00:41,454 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub022:0/16] 2024-02-02 09:01:00,312 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 09:01:03,900 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 09:01:03,900 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub022:0/16] 2024-02-02 09:01:03,983 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 09:08:44,246 (trainer:737) INFO: 20epoch:train:10001-10100batch: iter_time=3.231, forward_time=0.389, loss_ctc=42.385, loss_att=44.180, acc=0.738, loss=43.642, backward_time=0.415, grad_norm=31.588, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.255e-04, train_time=5.029 +[gpub022:0/16] 2024-02-02 09:11:12,520 (trainer:737) INFO: 20epoch:train:10101-10200batch: iter_time=8.560e-05, forward_time=0.388, loss_ctc=42.633, loss_att=41.289, acc=0.756, loss=41.692, backward_time=0.434, grad_norm=31.823, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=2.255e-04, train_time=1.482 +[gpub022:0/16] 2024-02-02 09:13:38,663 (trainer:737) INFO: 20epoch:train:10201-10300batch: iter_time=7.971e-05, forward_time=0.363, loss_ctc=48.453, loss_att=45.630, acc=0.753, loss=46.477, backward_time=0.479, grad_norm=35.151, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.254e-04, train_time=1.461 +[gpub022:0/16] 2024-02-02 09:16:34,972 (trainer:737) INFO: 20epoch:train:10301-10400batch: iter_time=9.384e-05, forward_time=0.292, loss_ctc=54.139, loss_att=50.358, acc=0.743, loss=51.492, backward_time=0.405, grad_norm=40.578, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.254e-04, train_time=1.763 +[gpub022:0/16] 2024-02-02 09:19:03,469 (trainer:737) INFO: 20epoch:train:10401-10500batch: iter_time=1.088e-04, forward_time=0.390, loss_ctc=49.939, loss_att=48.489, acc=0.761, loss=48.924, backward_time=0.446, grad_norm=35.421, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.253e-04, train_time=1.484 +[gpub022:0/16] 2024-02-02 09:21:30,610 (trainer:737) INFO: 20epoch:train:10501-10600batch: iter_time=9.487e-05, forward_time=0.291, loss_ctc=47.622, loss_att=43.920, acc=0.748, loss=45.031, backward_time=0.404, grad_norm=30.063, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.253e-04, train_time=1.472 +[gpub022:0/16] 2024-02-02 09:24:14,906 (trainer:737) INFO: 20epoch:train:10601-10700batch: iter_time=1.003e-04, forward_time=0.299, loss_ctc=56.036, loss_att=56.550, acc=0.730, loss=56.395, backward_time=0.407, grad_norm=38.860, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.253e-04, train_time=1.643 +[gpub022:0/16] 2024-02-02 09:26:44,324 (trainer:737) INFO: 20epoch:train:10701-10800batch: iter_time=3.871e-04, forward_time=0.396, loss_ctc=48.692, loss_att=56.596, acc=0.727, loss=54.225, backward_time=0.467, grad_norm=127.232, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.252e-04, train_time=1.493 +[gpub022:0/16] 2024-02-02 09:29:13,398 (trainer:737) INFO: 20epoch:train:10801-10900batch: iter_time=1.017e-04, forward_time=0.295, loss_ctc=53.866, loss_att=57.154, acc=0.730, loss=56.168, backward_time=0.408, grad_norm=34.847, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.252e-04, train_time=1.491 +[gpub022:0/16] 2024-02-02 09:32:17,250 (trainer:737) INFO: 20epoch:train:10901-11000batch: iter_time=2.214e-04, forward_time=0.370, loss_ctc=62.614, loss_att=58.522, acc=0.718, loss=59.749, backward_time=0.459, grad_norm=40.601, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.251e-04, train_time=1.837 +[gpub022:0/16] 2024-02-02 09:34:51,304 (trainer:737) INFO: 20epoch:train:11001-11100batch: iter_time=1.026e-04, forward_time=0.291, loss_ctc=50.116, loss_att=45.907, acc=0.744, loss=47.170, backward_time=0.405, grad_norm=34.330, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.251e-04, train_time=1.542 +[gpub022:0/16] 2024-02-02 09:37:47,654 (trainer:737) INFO: 20epoch:train:11101-11200batch: iter_time=9.931e-05, forward_time=0.323, loss_ctc=50.175, loss_att=50.845, acc=0.726, loss=50.644, backward_time=0.433, grad_norm=37.843, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.112, optim0_lr0=2.251e-04, train_time=1.762 +[gpub022:0/16] 2024-02-02 09:39:24,255 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub022:0/16] 2024-02-02 09:39:43,235 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 09:39:46,752 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 09:39:46,752 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub022:0/16] 2024-02-02 09:39:46,833 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 09:45:12,702 (trainer:737) INFO: 20epoch:train:11201-11300batch: iter_time=2.850, forward_time=0.363, loss_ctc=46.183, loss_att=44.282, acc=0.753, loss=44.853, backward_time=0.418, grad_norm=30.190, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.250e-04, train_time=4.450 +[gpub022:0/16] 2024-02-02 09:47:48,706 (trainer:737) INFO: 20epoch:train:11301-11400batch: iter_time=8.781e-05, forward_time=0.292, loss_ctc=45.580, loss_att=45.019, acc=0.748, loss=45.187, backward_time=0.404, grad_norm=31.685, clip=100.000, loss_scale=1.641e+34, optim_step_time=0.093, optim0_lr0=2.250e-04, train_time=1.560 +[gpub022:0/16] 2024-02-02 09:48:15,605 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 09:49:59,317 (trainer:737) INFO: 20epoch:train:11401-11500batch: iter_time=8.470e-05, forward_time=0.299, loss_ctc=45.005, loss_att=39.095, acc=0.770, loss=40.868, backward_time=0.414, grad_norm=30.343, clip=100.000, loss_scale=1.248e+34, optim_step_time=0.094, optim0_lr0=2.250e-04, train_time=1.306 +[gpub022:0/16] 2024-02-02 09:52:49,151 (trainer:737) INFO: 20epoch:train:11501-11600batch: iter_time=9.591e-05, forward_time=0.339, loss_ctc=48.070, loss_att=49.266, acc=0.736, loss=48.907, backward_time=0.423, grad_norm=33.852, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.249e-04, train_time=1.696 +[gpub022:0/16] 2024-02-02 09:55:16,259 (trainer:737) INFO: 20epoch:train:11601-11700batch: iter_time=1.018e-04, forward_time=0.293, loss_ctc=53.138, loss_att=51.402, acc=0.751, loss=51.923, backward_time=0.406, grad_norm=36.416, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.249e-04, train_time=1.473 +[gpub022:0/16] 2024-02-02 09:58:07,592 (trainer:737) INFO: 20epoch:train:11701-11800batch: iter_time=9.239e-05, forward_time=0.310, loss_ctc=50.720, loss_att=44.261, acc=0.758, loss=46.199, backward_time=0.409, grad_norm=32.903, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.248e-04, train_time=1.713 +[gpub022:0/16] 2024-02-02 10:02:02,678 (trainer:737) INFO: 20epoch:train:11801-11900batch: iter_time=0.105, forward_time=0.330, loss_ctc=49.806, loss_att=50.048, acc=0.743, loss=49.975, backward_time=0.457, grad_norm=33.320, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.248e-04, train_time=2.351 +[gpub022:0/16] 2024-02-02 10:04:25,631 (trainer:737) INFO: 20epoch:train:11901-12000batch: iter_time=8.550e-05, forward_time=0.291, loss_ctc=53.639, loss_att=57.454, acc=0.720, loss=56.310, backward_time=0.404, grad_norm=37.173, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.248e-04, train_time=1.427 +[gpub022:0/16] 2024-02-02 10:09:04,714 (trainer:737) INFO: 20epoch:train:12001-12100batch: iter_time=9.028e-05, forward_time=0.359, loss_ctc=49.014, loss_att=57.355, acc=0.722, loss=54.853, backward_time=0.419, grad_norm=34.794, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.247e-04, train_time=2.793 +[gpub022:0/16] 2024-02-02 10:11:35,007 (trainer:737) INFO: 20epoch:train:12101-12200batch: iter_time=8.672e-05, forward_time=0.407, loss_ctc=51.320, loss_att=51.114, acc=0.742, loss=51.176, backward_time=0.429, grad_norm=32.132, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.247e-04, train_time=1.503 +[gpub022:0/16] 2024-02-02 10:14:10,676 (trainer:737) INFO: 20epoch:train:12201-12300batch: iter_time=9.042e-05, forward_time=0.292, loss_ctc=61.055, loss_att=56.677, acc=0.726, loss=57.990, backward_time=0.405, grad_norm=41.272, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.247e-04, train_time=1.556 +[gpub022:0/16] 2024-02-02 10:17:37,398 (trainer:737) INFO: 20epoch:train:12301-12400batch: iter_time=0.069, forward_time=0.534, loss_ctc=49.377, loss_att=47.015, acc=0.745, loss=47.724, backward_time=0.545, grad_norm=32.118, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.246e-04, train_time=2.066 +[gpub022:0/16] 2024-02-02 10:20:50,950 (trainer:737) INFO: 20epoch:train:12401-12500batch: iter_time=9.479e-05, forward_time=0.345, loss_ctc=55.405, loss_att=53.252, acc=0.739, loss=53.898, backward_time=0.504, grad_norm=38.528, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=2.246e-04, train_time=1.936 +[gpub022:0/16] 2024-02-02 10:21:11,042 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub022:0/16] 2024-02-02 10:21:29,719 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 10:21:33,177 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 10:21:33,177 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub022:0/16] 2024-02-02 10:21:33,280 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 10:29:34,233 (trainer:737) INFO: 20epoch:train:12501-12600batch: iter_time=3.629, forward_time=0.287, loss_ctc=41.910, loss_att=44.770, acc=0.752, loss=43.912, backward_time=0.399, grad_norm=31.083, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.245e-04, train_time=5.233 +[gpub022:0/16] 2024-02-02 10:32:44,576 (trainer:737) INFO: 20epoch:train:12601-12700batch: iter_time=8.707e-05, forward_time=0.292, loss_ctc=42.161, loss_att=42.545, acc=0.756, loss=42.430, backward_time=0.401, grad_norm=31.215, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.245e-04, train_time=1.903 +[gpub022:0/16] 2024-02-02 10:35:14,258 (trainer:737) INFO: 20epoch:train:12701-12800batch: iter_time=9.321e-05, forward_time=0.398, loss_ctc=48.239, loss_att=45.576, acc=0.763, loss=46.375, backward_time=0.440, grad_norm=34.525, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.245e-04, train_time=1.497 +[gpub022:0/16] 2024-02-02 10:38:06,146 (trainer:737) INFO: 20epoch:train:12801-12900batch: iter_time=1.084e-04, forward_time=0.292, loss_ctc=54.137, loss_att=50.794, acc=0.756, loss=51.797, backward_time=0.406, grad_norm=38.412, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.244e-04, train_time=1.719 +[gpub022:0/16] 2024-02-02 10:38:21,461 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 10:40:21,998 (trainer:737) INFO: 20epoch:train:12901-13000batch: iter_time=1.001e-04, forward_time=0.315, loss_ctc=49.315, loss_att=49.921, acc=0.765, loss=49.740, backward_time=0.408, grad_norm=32.654, clip=100.000, loss_scale=5.664e+33, optim_step_time=0.093, optim0_lr0=2.244e-04, train_time=1.358 +[gpub022:0/16] 2024-02-02 10:43:11,820 (trainer:737) INFO: 20epoch:train:13001-13100batch: iter_time=1.019e-04, forward_time=0.344, loss_ctc=46.788, loss_att=44.607, acc=0.753, loss=45.261, backward_time=0.494, grad_norm=31.200, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.244e-04, train_time=1.697 +[gpub022:0/16] 2024-02-02 10:45:50,263 (trainer:737) INFO: 20epoch:train:13101-13200batch: iter_time=1.076e-04, forward_time=0.291, loss_ctc=54.980, loss_att=56.180, acc=0.738, loss=55.820, backward_time=0.405, grad_norm=36.763, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.243e-04, train_time=1.585 +[gpub022:0/16] 2024-02-02 10:48:36,173 (trainer:737) INFO: 20epoch:train:13201-13300batch: iter_time=9.931e-05, forward_time=0.294, loss_ctc=47.683, loss_att=56.650, acc=0.740, loss=53.960, backward_time=0.429, grad_norm=34.830, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.243e-04, train_time=1.659 +[gpub022:0/16] 2024-02-02 10:51:17,336 (trainer:737) INFO: 20epoch:train:13301-13400batch: iter_time=9.925e-05, forward_time=0.397, loss_ctc=53.266, loss_att=58.284, acc=0.739, loss=56.779, backward_time=0.454, grad_norm=34.074, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.242e-04, train_time=1.611 +[gpub022:0/16] 2024-02-02 10:54:02,646 (trainer:737) INFO: 20epoch:train:13401-13500batch: iter_time=1.022e-04, forward_time=0.292, loss_ctc=60.797, loss_att=60.918, acc=0.724, loss=60.882, backward_time=0.405, grad_norm=39.319, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.242e-04, train_time=1.653 +[gpub022:0/16] 2024-02-02 10:56:37,887 (trainer:737) INFO: 20epoch:train:13501-13600batch: iter_time=9.847e-05, forward_time=0.296, loss_ctc=50.232, loss_att=45.582, acc=0.756, loss=46.977, backward_time=0.417, grad_norm=33.808, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.242e-04, train_time=1.552 +[gpub022:0/16] 2024-02-02 10:59:43,657 (trainer:737) INFO: 20epoch:train:13601-13700batch: iter_time=1.035e-04, forward_time=0.377, loss_ctc=50.204, loss_att=50.841, acc=0.731, loss=50.650, backward_time=0.435, grad_norm=36.335, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.241e-04, train_time=1.857 +[gpub022:0/16] 2024-02-02 11:01:30,580 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub022:0/16] 2024-02-02 11:01:49,609 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 11:01:53,222 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 11:01:53,222 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub022:0/16] 2024-02-02 11:01:53,312 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 11:08:25,496 (trainer:737) INFO: 20epoch:train:13701-13800batch: iter_time=3.487, forward_time=0.327, loss_ctc=45.696, loss_att=47.476, acc=0.753, loss=46.942, backward_time=0.408, grad_norm=30.825, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.241e-04, train_time=5.218 +[gpub022:0/16] 2024-02-02 11:11:30,373 (trainer:737) INFO: 20epoch:train:13801-13900batch: iter_time=9.173e-05, forward_time=0.296, loss_ctc=44.992, loss_att=46.236, acc=0.759, loss=45.863, backward_time=0.406, grad_norm=31.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.241e-04, train_time=1.849 +[gpub022:0/16] 2024-02-02 11:14:12,113 (trainer:737) INFO: 20epoch:train:13901-14000batch: iter_time=8.634e-05, forward_time=0.360, loss_ctc=45.398, loss_att=40.280, acc=0.769, loss=41.815, backward_time=0.456, grad_norm=31.517, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.240e-04, train_time=1.617 +[gpub022:0/16] 2024-02-02 11:17:20,687 (trainer:737) INFO: 20epoch:train:14001-14100batch: iter_time=8.345e-05, forward_time=0.298, loss_ctc=47.915, loss_att=49.626, acc=0.743, loss=49.113, backward_time=0.415, grad_norm=33.313, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.240e-04, train_time=1.885 +[gpub022:0/16] 2024-02-02 11:20:06,725 (trainer:737) INFO: 20epoch:train:14101-14200batch: iter_time=8.924e-05, forward_time=0.406, loss_ctc=53.356, loss_att=52.013, acc=0.761, loss=52.416, backward_time=0.425, grad_norm=37.698, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.239e-04, train_time=1.661 +[gpub022:0/16] 2024-02-02 11:22:56,380 (trainer:737) INFO: 20epoch:train:14201-14300batch: iter_time=9.373e-05, forward_time=0.292, loss_ctc=50.069, loss_att=45.495, acc=0.762, loss=46.867, backward_time=0.411, grad_norm=31.763, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.239e-04, train_time=1.696 +[gpub022:0/16] 2024-02-02 11:25:41,087 (trainer:737) INFO: 20epoch:train:14301-14400batch: iter_time=8.940e-05, forward_time=0.300, loss_ctc=49.619, loss_att=50.291, acc=0.753, loss=50.089, backward_time=0.410, grad_norm=32.475, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.239e-04, train_time=1.647 +[gpub022:0/16] 2024-02-02 11:29:10,241 (trainer:737) INFO: 20epoch:train:14401-14500batch: iter_time=9.119e-05, forward_time=0.365, loss_ctc=52.750, loss_att=56.281, acc=0.730, loss=55.222, backward_time=0.423, grad_norm=37.607, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.238e-04, train_time=2.091 +[gpub022:0/16] 2024-02-02 11:31:47,289 (trainer:737) INFO: 20epoch:train:14501-14600batch: iter_time=1.162e-04, forward_time=0.346, loss_ctc=49.310, loss_att=58.031, acc=0.739, loss=55.415, backward_time=0.415, grad_norm=33.336, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.238e-04, train_time=1.570 +[gpub022:0/16] 2024-02-02 11:34:23,549 (trainer:737) INFO: 20epoch:train:14601-14700batch: iter_time=9.032e-05, forward_time=0.293, loss_ctc=51.029, loss_att=53.394, acc=0.749, loss=52.684, backward_time=0.408, grad_norm=33.112, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.238e-04, train_time=1.563 +[gpub022:0/16] 2024-02-02 11:37:46,094 (trainer:737) INFO: 20epoch:train:14701-14800batch: iter_time=9.842e-05, forward_time=0.382, loss_ctc=60.364, loss_att=56.434, acc=0.740, loss=57.613, backward_time=0.430, grad_norm=38.008, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=2.237e-04, train_time=2.024 +[gpub022:0/16] 2024-02-02 11:39:58,552 (trainer:737) INFO: 20epoch:train:14801-14900batch: iter_time=9.689e-05, forward_time=0.290, loss_ctc=48.927, loss_att=48.279, acc=0.741, loss=48.474, backward_time=0.405, grad_norm=32.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.237e-04, train_time=1.325 +[gpub022:0/16] 2024-02-02 11:42:57,968 (trainer:737) INFO: 20epoch:train:14901-15000batch: iter_time=9.887e-04, forward_time=0.412, loss_ctc=55.495, loss_att=57.489, acc=0.733, loss=56.891, backward_time=0.426, grad_norm=38.140, clip=100.000, loss_scale=9.865e+33, optim_step_time=0.113, optim0_lr0=2.236e-04, train_time=1.793 +[gpub022:0/16] 2024-02-02 12:20:24,564 (trainer:343) INFO: 20epoch results: [train] iter_time=0.248, forward_time=0.329, loss_ctc=51.246, loss_att=50.799, acc=0.742, loss=50.933, backward_time=0.422, grad_norm=35.688, clip=100.000, loss_scale=5.935e+33, optim_step_time=0.096, optim0_lr0=2.265e-04, train_time=1.818, time=7 hours, 34 minutes and 54.73 seconds, total_count=330000, gpu_max_cached_mem_GB=41.891, [valid] loss_ctc=42.053, cer_ctc=0.209, loss_att=44.871, acc=0.648, cer=0.343, wer=0.999, loss=44.025, time=37 minutes and 2.53 seconds, total_count=102762, gpu_max_cached_mem_GB=41.891 +[gpub022:0/16] 2024-02-02 12:20:34,267 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub022:0/16] 2024-02-02 12:20:34,364 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/15epoch.pth +[gpub022:0/16] 2024-02-02 12:20:34,364 (trainer:272) INFO: 21/45epoch started. Estimated time to finish: 1 week, 1 day and 19 hours +[gpub022:0/16] 2024-02-02 12:20:34,373 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub022:0/16] 2024-02-02 12:20:52,587 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 12:20:55,964 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 12:20:55,965 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub022:0/16] 2024-02-02 12:20:55,968 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 12:28:10,496 (trainer:737) INFO: 21epoch:train:1-100batch: iter_time=3.149, forward_time=0.354, loss_ctc=51.560, loss_att=47.868, acc=0.727, loss=48.976, backward_time=0.427, grad_norm=36.570, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.236e-04, train_time=4.561 +[gpub022:0/16] 2024-02-02 12:30:31,149 (trainer:737) INFO: 21epoch:train:101-200batch: iter_time=9.614e-05, forward_time=0.299, loss_ctc=51.741, loss_att=51.389, acc=0.729, loss=51.495, backward_time=0.405, grad_norm=39.886, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.236e-04, train_time=1.405 +[gpub022:0/16] 2024-02-02 12:33:09,734 (trainer:737) INFO: 21epoch:train:201-300batch: iter_time=1.041e-04, forward_time=0.323, loss_ctc=49.229, loss_att=48.274, acc=0.736, loss=48.560, backward_time=0.451, grad_norm=32.547, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.235e-04, train_time=1.586 +[gpub022:0/16] 2024-02-02 12:35:38,070 (trainer:737) INFO: 21epoch:train:301-400batch: iter_time=1.093e-04, forward_time=0.301, loss_ctc=52.231, loss_att=50.810, acc=0.754, loss=51.237, backward_time=0.409, grad_norm=35.174, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.235e-04, train_time=1.484 +[gpub022:0/16] 2024-02-02 12:38:10,921 (trainer:737) INFO: 21epoch:train:401-500batch: iter_time=1.077e-04, forward_time=0.310, loss_ctc=40.017, loss_att=37.275, acc=0.764, loss=38.098, backward_time=0.415, grad_norm=29.238, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.235e-04, train_time=1.528 +[gpub022:0/16] 2024-02-02 12:40:41,843 (trainer:737) INFO: 21epoch:train:501-600batch: iter_time=2.950e-04, forward_time=0.297, loss_ctc=43.078, loss_att=41.332, acc=0.750, loss=41.856, backward_time=0.405, grad_norm=31.591, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.234e-04, train_time=1.509 +[gpub022:0/16] 2024-02-02 12:43:19,281 (trainer:737) INFO: 21epoch:train:601-700batch: iter_time=1.014e-04, forward_time=0.340, loss_ctc=53.639, loss_att=50.162, acc=0.747, loss=51.205, backward_time=0.425, grad_norm=41.818, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.234e-04, train_time=1.574 +[gpub022:0/16] 2024-02-02 12:45:48,756 (trainer:737) INFO: 21epoch:train:701-800batch: iter_time=1.120e-04, forward_time=0.317, loss_ctc=50.383, loss_att=44.596, acc=0.738, loss=46.332, backward_time=0.437, grad_norm=38.133, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.233e-04, train_time=1.495 +[gpub022:0/16] 2024-02-02 12:48:13,689 (trainer:737) INFO: 21epoch:train:801-900batch: iter_time=9.832e-05, forward_time=0.306, loss_ctc=54.606, loss_att=54.311, acc=0.705, loss=54.400, backward_time=0.421, grad_norm=37.141, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.233e-04, train_time=1.449 +[gpub022:0/16] 2024-02-02 12:50:53,543 (trainer:737) INFO: 21epoch:train:901-1000batch: iter_time=1.008e-04, forward_time=0.385, loss_ctc=50.359, loss_att=55.815, acc=0.736, loss=54.178, backward_time=0.415, grad_norm=34.068, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.233e-04, train_time=1.598 +[gpub022:0/16] 2024-02-02 12:53:21,976 (trainer:737) INFO: 21epoch:train:1001-1100batch: iter_time=1.099e-04, forward_time=0.291, loss_ctc=50.225, loss_att=57.285, acc=0.728, loss=55.167, backward_time=0.405, grad_norm=35.525, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.232e-04, train_time=1.484 +[gpub022:0/16] 2024-02-02 12:55:58,303 (trainer:737) INFO: 21epoch:train:1101-1200batch: iter_time=1.052e-04, forward_time=0.340, loss_ctc=50.026, loss_att=51.533, acc=0.754, loss=51.081, backward_time=0.422, grad_norm=33.056, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.232e-04, train_time=1.563 +[gpub022:0/16] 2024-02-02 12:57:28,081 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub022:0/16] 2024-02-02 12:57:47,091 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 12:57:50,661 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 12:57:50,661 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub022:0/16] 2024-02-02 12:57:50,665 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 13:03:32,349 (trainer:737) INFO: 21epoch:train:1201-1300batch: iter_time=3.060, forward_time=0.342, loss_ctc=48.879, loss_att=45.992, acc=0.721, loss=46.858, backward_time=0.409, grad_norm=34.288, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.232e-04, train_time=4.540 +[gpub022:0/16] 2024-02-02 13:06:07,129 (trainer:737) INFO: 21epoch:train:1301-1400batch: iter_time=9.337e-05, forward_time=0.290, loss_ctc=49.816, loss_att=43.476, acc=0.769, loss=45.378, backward_time=0.403, grad_norm=32.447, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.231e-04, train_time=1.547 +[gpub022:0/16] 2024-02-02 13:08:27,979 (trainer:737) INFO: 21epoch:train:1401-1500batch: iter_time=1.077e-04, forward_time=0.290, loss_ctc=47.843, loss_att=48.842, acc=0.739, loss=48.542, backward_time=0.403, grad_norm=34.472, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.231e-04, train_time=1.408 +[gpub022:0/16] 2024-02-02 13:11:00,411 (trainer:737) INFO: 21epoch:train:1501-1600batch: iter_time=1.043e-04, forward_time=0.388, loss_ctc=50.996, loss_att=51.693, acc=0.745, loss=51.484, backward_time=0.455, grad_norm=32.404, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.230e-04, train_time=1.524 +[gpub022:0/16] 2024-02-02 13:11:44,267 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 13:13:28,181 (trainer:737) INFO: 21epoch:train:1601-1700batch: iter_time=1.037e-04, forward_time=0.292, loss_ctc=46.618, loss_att=45.328, acc=0.769, loss=45.715, backward_time=0.406, grad_norm=31.743, clip=100.000, loss_scale=6.661e+33, optim_step_time=0.093, optim0_lr0=2.230e-04, train_time=1.477 +[gpub022:0/16] 2024-02-02 13:15:58,478 (trainer:737) INFO: 21epoch:train:1701-1800batch: iter_time=1.006e-04, forward_time=0.289, loss_ctc=42.014, loss_att=37.727, acc=0.769, loss=39.013, backward_time=0.401, grad_norm=30.700, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.230e-04, train_time=1.503 +[gpub022:0/16] 2024-02-02 13:18:26,487 (trainer:737) INFO: 21epoch:train:1801-1900batch: iter_time=1.029e-04, forward_time=0.314, loss_ctc=47.242, loss_att=50.100, acc=0.751, loss=49.243, backward_time=0.432, grad_norm=35.416, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.229e-04, train_time=1.480 +[gpub022:0/16] 2024-02-02 13:21:01,181 (trainer:737) INFO: 21epoch:train:1901-2000batch: iter_time=1.183e-04, forward_time=0.366, loss_ctc=52.141, loss_att=46.841, acc=0.755, loss=48.431, backward_time=0.425, grad_norm=35.702, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.229e-04, train_time=1.546 +[gpub022:0/16] 2024-02-02 13:23:25,981 (trainer:737) INFO: 21epoch:train:2001-2100batch: iter_time=1.026e-04, forward_time=0.306, loss_ctc=50.630, loss_att=44.509, acc=0.740, loss=46.345, backward_time=0.402, grad_norm=35.453, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.229e-04, train_time=1.448 +[gpub022:0/16] 2024-02-02 13:25:57,744 (trainer:737) INFO: 21epoch:train:2101-2200batch: iter_time=1.047e-04, forward_time=0.320, loss_ctc=48.337, loss_att=52.646, acc=0.736, loss=51.353, backward_time=0.417, grad_norm=30.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.228e-04, train_time=1.517 +[gpub022:0/16] 2024-02-02 13:28:24,942 (trainer:737) INFO: 21epoch:train:2201-2300batch: iter_time=1.055e-04, forward_time=0.336, loss_ctc=54.530, loss_att=62.780, acc=0.738, loss=60.305, backward_time=0.434, grad_norm=35.101, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.228e-04, train_time=1.472 +[gpub022:0/16] 2024-02-02 13:30:53,286 (trainer:737) INFO: 21epoch:train:2301-2400batch: iter_time=1.039e-04, forward_time=0.293, loss_ctc=50.852, loss_att=52.375, acc=0.750, loss=51.918, backward_time=0.408, grad_norm=32.084, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.228e-04, train_time=1.483 +[gpub022:0/16] 2024-02-02 13:33:25,751 (trainer:737) INFO: 21epoch:train:2401-2500batch: iter_time=1.062e-04, forward_time=0.289, loss_ctc=42.976, loss_att=44.027, acc=0.749, loss=43.712, backward_time=0.402, grad_norm=31.972, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.227e-04, train_time=1.525 +[gpub022:0/16] 2024-02-02 13:33:45,779 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub022:0/16] 2024-02-02 13:34:04,402 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 13:34:08,003 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 13:34:08,003 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub022:0/16] 2024-02-02 13:34:08,037 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 13:41:25,730 (trainer:737) INFO: 21epoch:train:2501-2600batch: iter_time=3.264, forward_time=0.367, loss_ctc=50.110, loss_att=44.477, acc=0.750, loss=46.167, backward_time=0.468, grad_norm=35.130, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.227e-04, train_time=4.800 +[gpub022:0/16] 2024-02-02 13:44:00,961 (trainer:737) INFO: 21epoch:train:2601-2700batch: iter_time=9.482e-05, forward_time=0.291, loss_ctc=50.837, loss_att=49.332, acc=0.753, loss=49.783, backward_time=0.402, grad_norm=116.480, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.226e-04, train_time=1.552 +[gpub022:0/16] 2024-02-02 13:46:24,107 (trainer:737) INFO: 21epoch:train:2701-2800batch: iter_time=9.674e-05, forward_time=0.292, loss_ctc=47.715, loss_att=46.615, acc=0.748, loss=46.945, backward_time=0.405, grad_norm=31.635, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.226e-04, train_time=1.431 +[gpub022:0/16] 2024-02-02 13:48:49,180 (trainer:737) INFO: 21epoch:train:2801-2900batch: iter_time=2.209e-04, forward_time=0.320, loss_ctc=51.288, loss_att=50.544, acc=0.767, loss=50.767, backward_time=0.438, grad_norm=32.343, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.226e-04, train_time=1.451 +[gpub022:0/16] 2024-02-02 13:51:36,604 (trainer:737) INFO: 21epoch:train:2901-3000batch: iter_time=9.526e-05, forward_time=0.340, loss_ctc=39.646, loss_att=37.346, acc=0.773, loss=38.036, backward_time=0.446, grad_norm=37.116, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.225e-04, train_time=1.674 +[gpub022:0/16] 2024-02-02 13:53:53,083 (trainer:737) INFO: 21epoch:train:3001-3100batch: iter_time=9.862e-05, forward_time=0.290, loss_ctc=42.267, loss_att=41.226, acc=0.760, loss=41.538, backward_time=0.403, grad_norm=30.669, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.225e-04, train_time=1.365 +[gpub022:0/16] 2024-02-02 13:56:16,808 (trainer:737) INFO: 21epoch:train:3101-3200batch: iter_time=8.900e-05, forward_time=0.293, loss_ctc=50.934, loss_att=48.325, acc=0.763, loss=49.108, backward_time=0.405, grad_norm=35.150, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.225e-04, train_time=1.437 +[gpub022:0/16] 2024-02-02 13:58:56,933 (trainer:737) INFO: 21epoch:train:3201-3300batch: iter_time=1.027e-04, forward_time=0.305, loss_ctc=48.666, loss_att=44.255, acc=0.746, loss=45.578, backward_time=0.425, grad_norm=34.257, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.224e-04, train_time=1.601 +[gpub022:0/16] 2024-02-02 14:01:26,361 (trainer:737) INFO: 21epoch:train:3301-3400batch: iter_time=9.514e-05, forward_time=0.329, loss_ctc=53.653, loss_att=51.952, acc=0.726, loss=52.462, backward_time=0.451, grad_norm=34.630, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.224e-04, train_time=1.494 +[gpub022:0/16] 2024-02-02 14:03:46,534 (trainer:737) INFO: 21epoch:train:3401-3500batch: iter_time=9.385e-05, forward_time=0.293, loss_ctc=49.108, loss_att=55.785, acc=0.748, loss=53.782, backward_time=0.407, grad_norm=32.339, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.223e-04, train_time=1.402 +[gpub022:0/16] 2024-02-02 14:06:17,185 (trainer:737) INFO: 21epoch:train:3501-3600batch: iter_time=9.514e-05, forward_time=0.292, loss_ctc=49.679, loss_att=57.405, acc=0.736, loss=55.087, backward_time=0.404, grad_norm=33.537, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.223e-04, train_time=1.506 +[gpub022:0/16] 2024-02-02 14:08:55,190 (trainer:737) INFO: 21epoch:train:3601-3700batch: iter_time=4.177e-04, forward_time=0.393, loss_ctc=49.352, loss_att=51.688, acc=0.761, loss=50.987, backward_time=0.451, grad_norm=33.156, clip=100.000, loss_scale=8.879e+33, optim_step_time=0.105, optim0_lr0=2.223e-04, train_time=1.580 +[gpub022:0/16] 2024-02-02 14:10:22,834 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub022:0/16] 2024-02-02 14:10:42,164 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 14:10:45,800 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 14:10:45,800 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub022:0/16] 2024-02-02 14:10:45,804 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 14:16:57,396 (trainer:737) INFO: 21epoch:train:3701-3800batch: iter_time=3.357, forward_time=0.296, loss_ctc=48.135, loss_att=45.165, acc=0.732, loss=46.056, backward_time=0.406, grad_norm=34.676, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.222e-04, train_time=4.822 +[gpub022:0/16] 2024-02-02 14:19:19,934 (trainer:737) INFO: 21epoch:train:3801-3900batch: iter_time=8.509e-05, forward_time=0.312, loss_ctc=48.782, loss_att=44.826, acc=0.752, loss=46.013, backward_time=0.416, grad_norm=35.074, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.222e-04, train_time=1.425 +[gpub022:0/16] 2024-02-02 14:22:03,748 (trainer:737) INFO: 21epoch:train:3901-4000batch: iter_time=8.683e-05, forward_time=0.328, loss_ctc=47.294, loss_att=49.501, acc=0.729, loss=48.839, backward_time=0.442, grad_norm=34.061, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.222e-04, train_time=1.638 +[gpub022:0/16] 2024-02-02 14:22:48,257 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 14:24:40,064 (trainer:737) INFO: 21epoch:train:4001-4100batch: iter_time=9.147e-05, forward_time=0.301, loss_ctc=50.790, loss_att=51.170, acc=0.737, loss=51.056, backward_time=0.419, grad_norm=32.999, clip=100.000, loss_scale=6.871e+33, optim_step_time=0.094, optim0_lr0=2.221e-04, train_time=1.563 +[gpub022:0/16] 2024-02-02 14:27:17,123 (trainer:737) INFO: 21epoch:train:4101-4200batch: iter_time=9.040e-05, forward_time=0.315, loss_ctc=45.709, loss_att=45.010, acc=0.761, loss=45.220, backward_time=0.417, grad_norm=32.605, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.221e-04, train_time=1.570 +[gpub022:0/16] 2024-02-02 14:29:45,524 (trainer:737) INFO: 21epoch:train:4201-4300batch: iter_time=9.041e-05, forward_time=0.329, loss_ctc=41.093, loss_att=36.693, acc=0.769, loss=38.013, backward_time=0.446, grad_norm=30.488, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.221e-04, train_time=1.484 +[gpub022:0/16] 2024-02-02 14:32:33,547 (trainer:737) INFO: 21epoch:train:4301-4400batch: iter_time=1.470e-04, forward_time=0.292, loss_ctc=46.073, loss_att=49.517, acc=0.743, loss=48.484, backward_time=0.410, grad_norm=36.374, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.220e-04, train_time=1.680 +[gpub022:0/16] 2024-02-02 14:34:56,333 (trainer:737) INFO: 21epoch:train:4401-4500batch: iter_time=1.001e-04, forward_time=0.292, loss_ctc=51.258, loss_att=47.081, acc=0.747, loss=48.334, backward_time=0.406, grad_norm=35.976, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.220e-04, train_time=1.427 +[gpub022:0/16] 2024-02-02 14:37:41,375 (trainer:737) INFO: 21epoch:train:4501-4600batch: iter_time=9.777e-05, forward_time=0.304, loss_ctc=49.076, loss_att=43.200, acc=0.740, loss=44.963, backward_time=0.425, grad_norm=36.012, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.219e-04, train_time=1.650 +[gpub022:0/16] 2024-02-02 14:40:08,619 (trainer:737) INFO: 21epoch:train:4601-4700batch: iter_time=9.609e-05, forward_time=0.319, loss_ctc=48.084, loss_att=54.493, acc=0.718, loss=52.571, backward_time=0.458, grad_norm=32.122, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.219e-04, train_time=1.472 +[gpub022:0/16] 2024-02-02 14:42:58,745 (trainer:737) INFO: 21epoch:train:4701-4800batch: iter_time=9.476e-05, forward_time=0.292, loss_ctc=53.553, loss_att=63.101, acc=0.734, loss=60.237, backward_time=0.407, grad_norm=35.204, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.219e-04, train_time=1.701 +[gpub022:0/16] 2024-02-02 14:45:23,159 (trainer:737) INFO: 21epoch:train:4801-4900batch: iter_time=9.810e-05, forward_time=0.300, loss_ctc=50.286, loss_att=50.398, acc=0.747, loss=50.364, backward_time=0.421, grad_norm=33.315, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.218e-04, train_time=1.443 +[gpub022:0/16] 2024-02-02 14:48:11,585 (trainer:737) INFO: 21epoch:train:4901-5000batch: iter_time=9.409e-05, forward_time=0.339, loss_ctc=42.720, loss_att=45.160, acc=0.738, loss=44.428, backward_time=0.420, grad_norm=32.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.218e-04, train_time=1.685 +[gpub022:0/16] 2024-02-02 14:48:31,613 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub022:0/16] 2024-02-02 14:48:50,989 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 14:48:54,670 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 14:48:54,670 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub022:0/16] 2024-02-02 14:48:54,673 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 14:55:16,509 (trainer:737) INFO: 21epoch:train:5001-5100batch: iter_time=2.788, forward_time=0.317, loss_ctc=49.704, loss_att=44.894, acc=0.736, loss=46.337, backward_time=0.414, grad_norm=34.987, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.218e-04, train_time=4.249 +[gpub022:0/16] 2024-02-02 14:58:06,971 (trainer:737) INFO: 21epoch:train:5101-5200batch: iter_time=9.511e-05, forward_time=0.374, loss_ctc=50.262, loss_att=49.607, acc=0.737, loss=49.803, backward_time=0.416, grad_norm=37.529, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.217e-04, train_time=1.705 +[gpub022:0/16] 2024-02-02 15:00:30,578 (trainer:737) INFO: 21epoch:train:5201-5300batch: iter_time=4.309e-04, forward_time=0.372, loss_ctc=47.960, loss_att=46.830, acc=0.742, loss=47.169, backward_time=0.445, grad_norm=32.627, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.217e-04, train_time=1.436 +[gpub022:0/16] 2024-02-02 15:02:57,420 (trainer:737) INFO: 21epoch:train:5301-5400batch: iter_time=9.048e-05, forward_time=0.304, loss_ctc=51.808, loss_att=49.763, acc=0.761, loss=50.377, backward_time=0.414, grad_norm=36.240, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.217e-04, train_time=1.468 +[gpub022:0/16] 2024-02-02 15:05:36,724 (trainer:737) INFO: 21epoch:train:5401-5500batch: iter_time=9.576e-05, forward_time=0.342, loss_ctc=39.458, loss_att=36.242, acc=0.770, loss=37.207, backward_time=0.435, grad_norm=29.168, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.216e-04, train_time=1.593 +[gpub022:0/16] 2024-02-02 15:07:51,640 (trainer:737) INFO: 21epoch:train:5501-5600batch: iter_time=8.799e-05, forward_time=0.319, loss_ctc=42.001, loss_att=40.708, acc=0.756, loss=41.096, backward_time=0.415, grad_norm=31.103, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.216e-04, train_time=1.349 +[gpub022:0/16] 2024-02-02 15:10:35,269 (trainer:737) INFO: 21epoch:train:5601-5700batch: iter_time=1.118e-04, forward_time=0.330, loss_ctc=50.924, loss_att=48.426, acc=0.757, loss=49.175, backward_time=0.438, grad_norm=36.553, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.215e-04, train_time=1.636 +[gpub022:0/16] 2024-02-02 15:13:15,552 (trainer:737) INFO: 21epoch:train:5701-5800batch: iter_time=9.277e-05, forward_time=0.306, loss_ctc=48.318, loss_att=43.621, acc=0.744, loss=45.030, backward_time=0.411, grad_norm=33.043, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.215e-04, train_time=1.603 +[gpub022:0/16] 2024-02-02 15:15:41,057 (trainer:737) INFO: 21epoch:train:5801-5900batch: iter_time=8.630e-05, forward_time=0.380, loss_ctc=52.857, loss_att=52.999, acc=0.712, loss=52.956, backward_time=0.416, grad_norm=37.462, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.215e-04, train_time=1.455 +[gpub022:0/16] 2024-02-02 15:18:29,041 (trainer:737) INFO: 21epoch:train:5901-6000batch: iter_time=9.219e-05, forward_time=0.316, loss_ctc=48.679, loss_att=54.701, acc=0.743, loss=52.894, backward_time=0.419, grad_norm=32.345, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.214e-04, train_time=1.679 +[gpub022:0/16] 2024-02-02 15:21:01,530 (trainer:737) INFO: 21epoch:train:6001-6100batch: iter_time=8.570e-05, forward_time=0.336, loss_ctc=49.557, loss_att=56.770, acc=0.732, loss=54.606, backward_time=0.445, grad_norm=35.168, clip=100.000, loss_scale=8.671e+33, optim_step_time=0.102, optim0_lr0=2.214e-04, train_time=1.525 +[gpub022:0/16] 2024-02-02 15:23:40,695 (trainer:737) INFO: 21epoch:train:6101-6200batch: iter_time=9.441e-05, forward_time=0.313, loss_ctc=48.862, loss_att=51.031, acc=0.757, loss=50.380, backward_time=0.429, grad_norm=32.421, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.214e-04, train_time=1.591 +[gpub022:0/16] 2024-02-02 15:25:15,384 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub022:0/16] 2024-02-02 15:25:34,818 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 15:25:38,291 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 15:25:38,291 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub022:0/16] 2024-02-02 15:25:38,296 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 15:30:32,770 (trainer:737) INFO: 21epoch:train:6201-6300batch: iter_time=2.595, forward_time=0.346, loss_ctc=48.005, loss_att=45.155, acc=0.723, loss=46.010, backward_time=0.426, grad_norm=33.779, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.213e-04, train_time=4.121 +[gpub022:0/16] 2024-02-02 15:32:45,388 (trainer:737) INFO: 21epoch:train:6301-6400batch: iter_time=8.412e-05, forward_time=0.305, loss_ctc=48.457, loss_att=42.938, acc=0.757, loss=44.594, backward_time=0.409, grad_norm=33.045, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.213e-04, train_time=1.326 +[gpub022:0/16] 2024-02-02 15:35:21,882 (trainer:737) INFO: 21epoch:train:6401-6500batch: iter_time=8.158e-05, forward_time=0.312, loss_ctc=47.333, loss_att=48.781, acc=0.732, loss=48.347, backward_time=0.412, grad_norm=33.945, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.213e-04, train_time=1.564 +[gpub022:0/16] 2024-02-02 15:36:22,775 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 15:37:49,832 (trainer:737) INFO: 21epoch:train:6501-6600batch: iter_time=9.756e-05, forward_time=0.331, loss_ctc=50.914, loss_att=50.773, acc=0.739, loss=50.815, backward_time=0.422, grad_norm=33.246, clip=100.000, loss_scale=7.448e+33, optim_step_time=0.096, optim0_lr0=2.212e-04, train_time=1.480 +[gpub022:0/16] 2024-02-02 15:40:09,574 (trainer:737) INFO: 21epoch:train:6601-6700batch: iter_time=9.218e-05, forward_time=0.311, loss_ctc=45.425, loss_att=44.397, acc=0.766, loss=44.705, backward_time=0.412, grad_norm=30.942, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.212e-04, train_time=1.397 +[gpub022:0/16] 2024-02-02 15:42:52,684 (trainer:737) INFO: 21epoch:train:6701-6800batch: iter_time=4.538e-04, forward_time=0.316, loss_ctc=41.408, loss_att=36.314, acc=0.770, loss=37.842, backward_time=0.425, grad_norm=29.911, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.211e-04, train_time=1.630 +[gpub022:0/16] 2024-02-02 15:45:10,456 (trainer:737) INFO: 21epoch:train:6801-6900batch: iter_time=1.037e-04, forward_time=0.307, loss_ctc=46.278, loss_att=49.456, acc=0.744, loss=48.502, backward_time=0.407, grad_norm=36.031, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.211e-04, train_time=1.378 +[gpub022:0/16] 2024-02-02 15:47:28,374 (trainer:737) INFO: 21epoch:train:6901-7000batch: iter_time=1.014e-04, forward_time=0.309, loss_ctc=50.971, loss_att=46.064, acc=0.750, loss=47.536, backward_time=0.413, grad_norm=34.703, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.211e-04, train_time=1.379 +[gpub022:0/16] 2024-02-02 15:50:20,958 (trainer:737) INFO: 21epoch:train:7001-7100batch: iter_time=1.784e-04, forward_time=0.377, loss_ctc=48.719, loss_att=42.684, acc=0.742, loss=44.494, backward_time=0.433, grad_norm=33.485, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.210e-04, train_time=1.725 +[gpub022:0/16] 2024-02-02 15:50:36,505 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 15:52:45,862 (trainer:737) INFO: 21epoch:train:7101-7200batch: iter_time=9.972e-05, forward_time=0.290, loss_ctc=47.253, loss_att=53.170, acc=0.721, loss=51.395, backward_time=0.403, grad_norm=31.276, clip=100.000, loss_scale=2.885e+33, optim_step_time=0.093, optim0_lr0=2.210e-04, train_time=1.449 +[gpub022:0/16] 2024-02-02 15:55:23,320 (trainer:737) INFO: 21epoch:train:7201-7300batch: iter_time=9.592e-05, forward_time=0.318, loss_ctc=52.733, loss_att=61.774, acc=0.738, loss=59.062, backward_time=0.415, grad_norm=34.648, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.210e-04, train_time=1.574 +[gpub022:0/16] 2024-02-02 15:57:53,979 (trainer:737) INFO: 21epoch:train:7301-7400batch: iter_time=8.758e-05, forward_time=0.350, loss_ctc=50.015, loss_att=49.987, acc=0.748, loss=49.996, backward_time=0.445, grad_norm=32.072, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.209e-04, train_time=1.506 +[gpub022:0/16] 2024-02-02 16:00:23,127 (trainer:737) INFO: 21epoch:train:7401-7500batch: iter_time=9.089e-05, forward_time=0.289, loss_ctc=42.578, loss_att=45.182, acc=0.739, loss=44.401, backward_time=0.403, grad_norm=31.879, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.209e-04, train_time=1.492 +[gpub022:0/16] 2024-02-02 16:00:43,156 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub022:0/16] 2024-02-02 16:01:02,302 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 16:01:05,832 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 16:01:05,832 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub022:0/16] 2024-02-02 16:01:05,836 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 16:08:09,236 (trainer:737) INFO: 21epoch:train:7501-7600batch: iter_time=3.200, forward_time=0.393, loss_ctc=49.537, loss_att=46.084, acc=0.746, loss=47.120, backward_time=0.429, grad_norm=37.299, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.209e-04, train_time=4.660 +[gpub022:0/16] 2024-02-02 16:10:33,209 (trainer:737) INFO: 21epoch:train:7601-7700batch: iter_time=1.020e-04, forward_time=0.292, loss_ctc=48.738, loss_att=49.240, acc=0.755, loss=49.089, backward_time=0.405, grad_norm=37.021, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.208e-04, train_time=1.440 +[gpub022:0/16] 2024-02-02 16:13:31,364 (trainer:737) INFO: 21epoch:train:7701-7800batch: iter_time=9.727e-05, forward_time=0.340, loss_ctc=47.761, loss_att=47.547, acc=0.748, loss=47.611, backward_time=0.458, grad_norm=33.959, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.208e-04, train_time=1.781 +[gpub022:0/16] 2024-02-02 16:15:54,718 (trainer:737) INFO: 21epoch:train:7801-7900batch: iter_time=1.013e-04, forward_time=0.309, loss_ctc=50.824, loss_att=50.668, acc=0.767, loss=50.715, backward_time=0.411, grad_norm=34.680, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.208e-04, train_time=1.433 +[gpub022:0/16] 2024-02-02 16:18:38,624 (trainer:737) INFO: 21epoch:train:7901-8000batch: iter_time=5.222e-04, forward_time=0.372, loss_ctc=39.267, loss_att=37.285, acc=0.775, loss=37.880, backward_time=0.414, grad_norm=29.655, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.207e-04, train_time=1.639 +[gpub022:0/16] 2024-02-02 16:21:08,344 (trainer:737) INFO: 21epoch:train:8001-8100batch: iter_time=1.047e-04, forward_time=0.289, loss_ctc=41.668, loss_att=41.205, acc=0.762, loss=41.344, backward_time=0.402, grad_norm=29.460, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.207e-04, train_time=1.497 +[gpub022:0/16] 2024-02-02 16:23:39,971 (trainer:737) INFO: 21epoch:train:8101-8200batch: iter_time=1.822e-04, forward_time=0.357, loss_ctc=50.305, loss_att=48.209, acc=0.766, loss=48.837, backward_time=0.478, grad_norm=35.369, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.206e-04, train_time=1.515 +[gpub022:0/16] 2024-02-02 16:26:26,361 (trainer:737) INFO: 21epoch:train:8201-8300batch: iter_time=9.425e-05, forward_time=0.289, loss_ctc=47.898, loss_att=43.214, acc=0.752, loss=44.619, backward_time=0.401, grad_norm=32.714, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.206e-04, train_time=1.665 +[gpub022:0/16] 2024-02-02 16:28:56,023 (trainer:737) INFO: 21epoch:train:8301-8400batch: iter_time=1.004e-04, forward_time=0.291, loss_ctc=52.050, loss_att=52.045, acc=0.725, loss=52.046, backward_time=0.403, grad_norm=36.890, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.206e-04, train_time=1.496 +[gpub022:0/16] 2024-02-02 16:31:37,233 (trainer:737) INFO: 21epoch:train:8401-8500batch: iter_time=9.727e-05, forward_time=0.401, loss_ctc=48.169, loss_att=55.501, acc=0.751, loss=53.302, backward_time=0.421, grad_norm=32.291, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.205e-04, train_time=1.610 +[gpub022:0/16] 2024-02-02 16:34:01,676 (trainer:737) INFO: 21epoch:train:8501-8600batch: iter_time=8.906e-05, forward_time=0.294, loss_ctc=48.836, loss_att=57.361, acc=0.738, loss=54.803, backward_time=0.405, grad_norm=34.444, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.205e-04, train_time=1.446 +[gpub022:0/16] 2024-02-02 16:36:43,531 (trainer:737) INFO: 21epoch:train:8601-8700batch: iter_time=6.487e-04, forward_time=0.362, loss_ctc=48.270, loss_att=50.141, acc=0.765, loss=49.580, backward_time=0.462, grad_norm=32.111, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.205e-04, train_time=1.618 +[gpub022:0/16] 2024-02-02 16:38:33,038 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub022:0/16] 2024-02-02 16:38:52,734 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 16:38:56,509 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 16:38:56,510 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub022:0/16] 2024-02-02 16:38:56,513 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 16:44:18,999 (trainer:737) INFO: 21epoch:train:8701-8800batch: iter_time=3.004, forward_time=0.287, loss_ctc=47.394, loss_att=45.375, acc=0.730, loss=45.981, backward_time=0.401, grad_norm=35.769, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.204e-04, train_time=4.554 +[gpub022:0/16] 2024-02-02 16:47:05,485 (trainer:737) INFO: 21epoch:train:8801-8900batch: iter_time=9.403e-05, forward_time=0.288, loss_ctc=48.027, loss_att=43.515, acc=0.757, loss=44.868, backward_time=0.400, grad_norm=33.538, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.204e-04, train_time=1.665 +[gpub022:0/16] 2024-02-02 16:49:52,146 (trainer:737) INFO: 21epoch:train:8901-9000batch: iter_time=8.334e-05, forward_time=0.369, loss_ctc=46.987, loss_att=48.890, acc=0.733, loss=48.319, backward_time=0.442, grad_norm=33.356, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.204e-04, train_time=1.666 +[gpub022:0/16] 2024-02-02 16:52:34,962 (trainer:737) INFO: 21epoch:train:9001-9100batch: iter_time=9.046e-05, forward_time=0.291, loss_ctc=50.691, loss_att=50.697, acc=0.740, loss=50.695, backward_time=0.404, grad_norm=33.112, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.203e-04, train_time=1.627 +[gpub022:0/16] 2024-02-02 16:53:38,729 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 16:54:49,318 (trainer:737) INFO: 21epoch:train:9101-9200batch: iter_time=9.124e-05, forward_time=0.292, loss_ctc=45.588, loss_att=44.003, acc=0.766, loss=44.478, backward_time=0.406, grad_norm=31.415, clip=100.000, loss_scale=3.514e+33, optim_step_time=0.093, optim0_lr0=2.203e-04, train_time=1.345 +[gpub022:0/16] 2024-02-02 16:57:40,713 (trainer:737) INFO: 21epoch:train:9201-9300batch: iter_time=8.706e-05, forward_time=0.343, loss_ctc=40.761, loss_att=35.875, acc=0.773, loss=37.341, backward_time=0.442, grad_norm=30.573, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.203e-04, train_time=1.714 +[gpub022:0/16] 2024-02-02 17:00:23,829 (trainer:737) INFO: 21epoch:train:9301-9400batch: iter_time=9.011e-05, forward_time=0.290, loss_ctc=45.833, loss_att=49.601, acc=0.744, loss=48.470, backward_time=0.403, grad_norm=39.279, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.202e-04, train_time=1.631 +[gpub022:0/16] 2024-02-02 17:02:38,314 (trainer:737) INFO: 21epoch:train:9401-9500batch: iter_time=8.721e-05, forward_time=0.291, loss_ctc=51.048, loss_att=46.184, acc=0.751, loss=47.643, backward_time=0.406, grad_norm=35.158, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.202e-04, train_time=1.344 +[gpub022:0/16] 2024-02-02 17:05:49,459 (trainer:737) INFO: 21epoch:train:9501-9600batch: iter_time=2.999e-04, forward_time=0.392, loss_ctc=48.423, loss_att=43.166, acc=0.741, loss=44.743, backward_time=0.430, grad_norm=34.091, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.201e-04, train_time=1.912 +[gpub022:0/16] 2024-02-02 17:08:02,343 (trainer:737) INFO: 21epoch:train:9601-9700batch: iter_time=8.953e-05, forward_time=0.291, loss_ctc=47.161, loss_att=53.554, acc=0.723, loss=51.636, backward_time=0.405, grad_norm=30.865, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.201e-04, train_time=1.329 +[gpub022:0/16] 2024-02-02 17:10:34,482 (trainer:737) INFO: 21epoch:train:9701-9800batch: iter_time=1.027e-04, forward_time=0.296, loss_ctc=52.666, loss_att=61.270, acc=0.742, loss=58.689, backward_time=0.409, grad_norm=34.872, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.201e-04, train_time=1.520 +[gpub022:0/16] 2024-02-02 17:13:27,255 (trainer:737) INFO: 21epoch:train:9801-9900batch: iter_time=2.645e-04, forward_time=0.370, loss_ctc=49.378, loss_att=50.030, acc=0.746, loss=49.835, backward_time=0.429, grad_norm=33.465, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.200e-04, train_time=1.728 +[gpub022:0/16] 2024-02-02 17:15:55,642 (trainer:737) INFO: 21epoch:train:9901-10000batch: iter_time=8.740e-05, forward_time=0.290, loss_ctc=42.113, loss_att=44.928, acc=0.740, loss=44.084, backward_time=0.403, grad_norm=52.916, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.200e-04, train_time=1.484 +[gpub022:0/16] 2024-02-02 17:16:15,671 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub022:0/16] 2024-02-02 17:16:35,655 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 17:16:39,153 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 17:16:39,153 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub022:0/16] 2024-02-02 17:16:39,157 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 17:23:55,691 (trainer:737) INFO: 21epoch:train:10001-10100batch: iter_time=3.060, forward_time=0.299, loss_ctc=48.814, loss_att=45.547, acc=0.749, loss=46.527, backward_time=0.420, grad_norm=35.246, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.200e-04, train_time=4.800 +[gpub022:0/16] 2024-02-02 17:26:21,479 (trainer:737) INFO: 21epoch:train:10101-10200batch: iter_time=9.579e-05, forward_time=0.395, loss_ctc=49.626, loss_att=49.492, acc=0.755, loss=49.532, backward_time=0.424, grad_norm=35.701, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.199e-04, train_time=1.458 +[gpub022:0/16] 2024-02-02 17:29:00,080 (trainer:737) INFO: 21epoch:train:10201-10300batch: iter_time=8.838e-05, forward_time=0.292, loss_ctc=46.998, loss_att=46.189, acc=0.751, loss=46.432, backward_time=0.404, grad_norm=31.979, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.199e-04, train_time=1.585 +[gpub022:0/16] 2024-02-02 17:31:43,701 (trainer:737) INFO: 21epoch:train:10301-10400batch: iter_time=8.677e-05, forward_time=0.367, loss_ctc=50.539, loss_att=51.215, acc=0.767, loss=51.012, backward_time=0.450, grad_norm=33.880, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.199e-04, train_time=1.637 +[gpub022:0/16] 2024-02-02 17:34:07,748 (trainer:737) INFO: 21epoch:train:10401-10500batch: iter_time=9.474e-05, forward_time=0.290, loss_ctc=38.635, loss_att=36.654, acc=0.778, loss=37.248, backward_time=0.403, grad_norm=28.235, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.198e-04, train_time=1.440 +[gpub022:0/16] 2024-02-02 17:36:28,533 (trainer:737) INFO: 21epoch:train:10501-10600batch: iter_time=9.733e-05, forward_time=0.291, loss_ctc=41.523, loss_att=41.277, acc=0.762, loss=41.351, backward_time=0.402, grad_norm=31.057, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.198e-04, train_time=1.408 +[gpub022:0/16] 2024-02-02 17:39:31,091 (trainer:737) INFO: 21epoch:train:10601-10700batch: iter_time=1.086e-04, forward_time=0.378, loss_ctc=49.788, loss_att=48.054, acc=0.766, loss=48.574, backward_time=0.435, grad_norm=36.664, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.106, optim0_lr0=2.198e-04, train_time=1.825 +[gpub022:0/16] 2024-02-02 17:41:45,223 (trainer:737) INFO: 21epoch:train:10701-10800batch: iter_time=9.939e-05, forward_time=0.291, loss_ctc=47.560, loss_att=43.355, acc=0.752, loss=44.616, backward_time=0.403, grad_norm=34.577, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.197e-04, train_time=1.341 +[gpub022:0/16] 2024-02-02 17:44:33,195 (trainer:737) INFO: 21epoch:train:10801-10900batch: iter_time=9.864e-05, forward_time=0.292, loss_ctc=52.114, loss_att=51.969, acc=0.726, loss=52.012, backward_time=0.406, grad_norm=37.098, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.197e-04, train_time=1.679 +[gpub022:0/16] 2024-02-02 17:47:19,813 (trainer:737) INFO: 21epoch:train:10901-11000batch: iter_time=8.868e-05, forward_time=0.380, loss_ctc=47.664, loss_att=54.464, acc=0.753, loss=52.424, backward_time=0.447, grad_norm=31.747, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.196e-04, train_time=1.666 +[gpub022:0/16] 2024-02-02 17:49:42,267 (trainer:737) INFO: 21epoch:train:11001-11100batch: iter_time=9.480e-05, forward_time=0.293, loss_ctc=48.638, loss_att=56.965, acc=0.741, loss=54.467, backward_time=0.404, grad_norm=35.877, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.196e-04, train_time=1.425 +[gpub022:0/16] 2024-02-02 17:52:38,622 (trainer:737) INFO: 21epoch:train:11101-11200batch: iter_time=8.933e-05, forward_time=0.302, loss_ctc=48.540, loss_att=50.652, acc=0.765, loss=50.018, backward_time=0.408, grad_norm=32.346, clip=100.000, loss_scale=3.946e+33, optim_step_time=0.094, optim0_lr0=2.196e-04, train_time=1.763 +[gpub022:0/16] 2024-02-02 17:54:08,195 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub022:0/16] 2024-02-02 17:54:27,649 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 17:54:31,293 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 17:54:31,293 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub022:0/16] 2024-02-02 17:54:31,296 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 18:00:36,271 (trainer:737) INFO: 21epoch:train:11201-11300batch: iter_time=3.295, forward_time=0.382, loss_ctc=47.342, loss_att=44.408, acc=0.738, loss=45.288, backward_time=0.421, grad_norm=35.502, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.195e-04, train_time=4.776 +[gpub022:0/16] 2024-02-02 18:03:15,251 (trainer:737) INFO: 21epoch:train:11301-11400batch: iter_time=8.245e-05, forward_time=0.291, loss_ctc=47.495, loss_att=41.663, acc=0.776, loss=43.413, backward_time=0.404, grad_norm=31.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.195e-04, train_time=1.590 +[gpub022:0/16] 2024-02-02 18:06:04,506 (trainer:737) INFO: 21epoch:train:11401-11500batch: iter_time=7.859e-05, forward_time=0.364, loss_ctc=46.800, loss_att=47.962, acc=0.745, loss=47.614, backward_time=0.417, grad_norm=34.232, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.195e-04, train_time=1.692 +[gpub022:0/16] 2024-02-02 18:08:19,993 (trainer:737) INFO: 21epoch:train:11501-11600batch: iter_time=9.063e-05, forward_time=0.294, loss_ctc=50.231, loss_att=50.917, acc=0.750, loss=50.711, backward_time=0.409, grad_norm=31.942, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.194e-04, train_time=1.355 +[gpub022:0/16] 2024-02-02 18:11:04,237 (trainer:737) INFO: 21epoch:train:11601-11700batch: iter_time=8.813e-05, forward_time=0.295, loss_ctc=45.072, loss_att=44.399, acc=0.776, loss=44.601, backward_time=0.408, grad_norm=31.129, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.194e-04, train_time=1.642 +[gpub022:0/16] 2024-02-02 18:13:45,621 (trainer:737) INFO: 21epoch:train:11701-11800batch: iter_time=9.722e-05, forward_time=0.343, loss_ctc=40.751, loss_att=36.568, acc=0.775, loss=37.823, backward_time=0.451, grad_norm=29.625, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.194e-04, train_time=1.614 +[gpub022:0/16] 2024-02-02 18:16:09,548 (trainer:737) INFO: 21epoch:train:11801-11900batch: iter_time=8.977e-05, forward_time=0.292, loss_ctc=45.510, loss_att=49.196, acc=0.755, loss=48.090, backward_time=0.405, grad_norm=37.965, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.193e-04, train_time=1.439 +[gpub022:0/16] 2024-02-02 18:19:01,564 (trainer:737) INFO: 21epoch:train:11901-12000batch: iter_time=8.695e-05, forward_time=0.321, loss_ctc=50.508, loss_att=45.837, acc=0.759, loss=47.238, backward_time=0.426, grad_norm=35.204, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.193e-04, train_time=1.720 +[gpub022:0/16] 2024-02-02 18:21:45,418 (trainer:737) INFO: 21epoch:train:12001-12100batch: iter_time=8.670e-05, forward_time=0.289, loss_ctc=47.979, loss_att=42.776, acc=0.747, loss=44.337, backward_time=0.401, grad_norm=34.044, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.193e-04, train_time=1.638 +[gpub022:0/16] 2024-02-02 18:24:39,454 (trainer:737) INFO: 21epoch:train:12101-12200batch: iter_time=9.005e-05, forward_time=0.364, loss_ctc=46.868, loss_att=52.053, acc=0.742, loss=50.498, backward_time=0.472, grad_norm=31.262, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.192e-04, train_time=1.740 +[gpub022:0/16] 2024-02-02 18:27:25,497 (trainer:737) INFO: 21epoch:train:12201-12300batch: iter_time=8.847e-05, forward_time=0.295, loss_ctc=52.353, loss_att=62.032, acc=0.742, loss=59.128, backward_time=0.409, grad_norm=33.885, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.192e-04, train_time=1.660 +[gpub022:0/16] 2024-02-02 18:29:48,772 (trainer:737) INFO: 21epoch:train:12301-12400batch: iter_time=8.954e-05, forward_time=0.293, loss_ctc=49.917, loss_att=51.660, acc=0.754, loss=51.137, backward_time=0.406, grad_norm=34.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.192e-04, train_time=1.432 +[gpub022:0/16] 2024-02-02 18:32:30,934 (trainer:737) INFO: 21epoch:train:12401-12500batch: iter_time=8.350e-05, forward_time=0.325, loss_ctc=42.126, loss_att=43.910, acc=0.753, loss=43.375, backward_time=0.413, grad_norm=32.545, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.191e-04, train_time=1.621 +[gpub022:0/16] 2024-02-02 18:32:51,065 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub022:0/16] 2024-02-02 18:33:10,225 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 18:33:13,913 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 18:33:13,913 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub022:0/16] 2024-02-02 18:33:13,916 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 18:40:32,474 (trainer:737) INFO: 21epoch:train:12501-12600batch: iter_time=3.104, forward_time=0.292, loss_ctc=49.226, loss_att=46.338, acc=0.736, loss=47.204, backward_time=0.402, grad_norm=35.179, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.191e-04, train_time=4.815 +[gpub022:0/16] 2024-02-02 18:43:20,534 (trainer:737) INFO: 21epoch:train:12601-12700batch: iter_time=8.506e-05, forward_time=0.360, loss_ctc=48.629, loss_att=49.945, acc=0.736, loss=49.551, backward_time=0.441, grad_norm=36.538, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.191e-04, train_time=1.680 +[gpub022:0/16] 2024-02-02 18:46:06,954 (trainer:737) INFO: 21epoch:train:12701-12800batch: iter_time=8.252e-05, forward_time=0.291, loss_ctc=46.936, loss_att=46.679, acc=0.743, loss=46.756, backward_time=0.406, grad_norm=33.226, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.190e-04, train_time=1.664 +[gpub022:0/16] 2024-02-02 18:49:03,476 (trainer:737) INFO: 21epoch:train:12801-12900batch: iter_time=8.804e-05, forward_time=0.393, loss_ctc=50.585, loss_att=49.670, acc=0.760, loss=49.945, backward_time=0.430, grad_norm=34.481, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.118, optim0_lr0=2.190e-04, train_time=1.765 +[gpub022:0/16] 2024-02-02 18:51:58,905 (trainer:737) INFO: 21epoch:train:12901-13000batch: iter_time=0.003, forward_time=0.349, loss_ctc=38.794, loss_att=36.719, acc=0.770, loss=37.342, backward_time=0.411, grad_norm=29.410, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.189e-04, train_time=1.754 +[gpub022:0/16] 2024-02-02 18:54:25,929 (trainer:737) INFO: 21epoch:train:13001-13100batch: iter_time=4.718e-04, forward_time=0.320, loss_ctc=41.710, loss_att=40.598, acc=0.755, loss=40.932, backward_time=0.406, grad_norm=31.542, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.189e-04, train_time=1.470 +[gpub022:0/16] 2024-02-02 18:57:16,886 (trainer:737) INFO: 21epoch:train:13101-13200batch: iter_time=8.277e-05, forward_time=0.328, loss_ctc=49.799, loss_att=48.257, acc=0.759, loss=48.719, backward_time=0.459, grad_norm=36.350, clip=100.000, loss_scale=7.892e+33, optim_step_time=0.100, optim0_lr0=2.189e-04, train_time=1.709 +[gpub022:0/16] 2024-02-02 18:59:51,781 (trainer:737) INFO: 21epoch:train:13201-13300batch: iter_time=7.764e-05, forward_time=0.289, loss_ctc=48.161, loss_att=43.784, acc=0.746, loss=45.097, backward_time=0.401, grad_norm=35.131, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.188e-04, train_time=1.549 +[gpub022:0/16] 2024-02-02 19:02:41,189 (trainer:737) INFO: 21epoch:train:13301-13400batch: iter_time=9.021e-05, forward_time=0.352, loss_ctc=52.040, loss_att=52.845, acc=0.711, loss=52.603, backward_time=0.466, grad_norm=37.945, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.188e-04, train_time=1.694 +[gpub022:0/16] 2024-02-02 19:05:16,635 (trainer:737) INFO: 21epoch:train:13401-13500batch: iter_time=8.209e-05, forward_time=0.316, loss_ctc=47.823, loss_att=55.140, acc=0.741, loss=52.945, backward_time=0.412, grad_norm=32.394, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.188e-04, train_time=1.555 +[gpub022:0/16] 2024-02-02 19:07:54,704 (trainer:737) INFO: 21epoch:train:13501-13600batch: iter_time=8.845e-05, forward_time=0.362, loss_ctc=48.577, loss_att=55.837, acc=0.737, loss=53.659, backward_time=0.416, grad_norm=33.691, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.187e-04, train_time=1.580 +[gpub022:0/16] 2024-02-02 19:09:42,109 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub022:0/16] 2024-02-02 19:10:36,179 (trainer:737) INFO: 21epoch:train:13601-13700batch: iter_time=8.247e-05, forward_time=0.292, loss_ctc=48.235, loss_att=51.225, acc=0.760, loss=50.328, backward_time=0.406, grad_norm=33.115, clip=100.000, loss_scale=8.496e+33, optim_step_time=0.093, optim0_lr0=2.187e-04, train_time=1.615 +[gpub022:0/16] 2024-02-02 19:12:17,207 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub022:0/16] 2024-02-02 19:12:36,325 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 19:12:40,184 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 19:12:40,184 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub022:0/16] 2024-02-02 19:12:40,190 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub022:0/16] 2024-02-02 19:18:24,584 (trainer:737) INFO: 21epoch:train:13701-13800batch: iter_time=3.121, forward_time=0.362, loss_ctc=46.690, loss_att=44.974, acc=0.728, loss=45.489, backward_time=0.411, grad_norm=36.524, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.187e-04, train_time=4.684 +[gpub022:0/16] 2024-02-02 19:20:50,759 (trainer:737) INFO: 21epoch:train:13801-13900batch: iter_time=7.585e-05, forward_time=0.294, loss_ctc=47.945, loss_att=43.304, acc=0.772, loss=44.696, backward_time=0.404, grad_norm=35.267, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.186e-04, train_time=1.462 +[gpub022:0/16] 2024-02-02 19:23:43,688 (trainer:737) INFO: 21epoch:train:13901-14000batch: iter_time=7.962e-05, forward_time=0.337, loss_ctc=46.359, loss_att=47.714, acc=0.747, loss=47.307, backward_time=0.465, grad_norm=32.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.116, optim0_lr0=2.186e-04, train_time=1.729 +[gpub022:0/16] 2024-02-02 19:25:58,472 (trainer:737) INFO: 21epoch:train:14001-14100batch: iter_time=8.911e-05, forward_time=0.295, loss_ctc=49.900, loss_att=51.363, acc=0.748, loss=50.924, backward_time=0.408, grad_norm=35.339, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.186e-04, train_time=1.348 +[gpub022:0/16] 2024-02-02 19:28:48,257 (trainer:737) INFO: 21epoch:train:14101-14200batch: iter_time=1.037e-04, forward_time=0.360, loss_ctc=45.141, loss_att=44.268, acc=0.777, loss=44.530, backward_time=0.419, grad_norm=29.361, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.185e-04, train_time=1.698 +[gpub022:0/16] 2024-02-02 19:31:11,613 (trainer:737) INFO: 21epoch:train:14201-14300batch: iter_time=8.648e-05, forward_time=0.290, loss_ctc=40.558, loss_att=36.473, acc=0.776, loss=37.699, backward_time=0.401, grad_norm=29.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.185e-04, train_time=1.433 +[gpub022:0/16] 2024-02-02 19:33:33,319 (trainer:737) INFO: 21epoch:train:14301-14400batch: iter_time=9.412e-05, forward_time=0.293, loss_ctc=45.386, loss_att=49.367, acc=0.756, loss=48.173, backward_time=0.405, grad_norm=35.748, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.185e-04, train_time=1.417 +[gpub022:0/16] 2024-02-02 19:36:22,969 (trainer:737) INFO: 21epoch:train:14401-14500batch: iter_time=2.576e-04, forward_time=0.339, loss_ctc=50.649, loss_att=45.694, acc=0.765, loss=47.180, backward_time=0.464, grad_norm=35.381, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.184e-04, train_time=1.696 +[gpub022:0/16] 2024-02-02 19:38:48,534 (trainer:737) INFO: 21epoch:train:14501-14600batch: iter_time=8.970e-05, forward_time=0.289, loss_ctc=47.927, loss_att=43.403, acc=0.745, loss=44.760, backward_time=0.403, grad_norm=34.945, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.184e-04, train_time=1.455 +[gpub022:0/16] 2024-02-02 19:41:05,625 (trainer:737) INFO: 21epoch:train:14601-14700batch: iter_time=9.207e-05, forward_time=0.292, loss_ctc=46.774, loss_att=52.378, acc=0.740, loss=50.697, backward_time=0.405, grad_norm=30.796, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.184e-04, train_time=1.371 +[gpub022:0/16] 2024-02-02 19:43:58,350 (trainer:737) INFO: 21epoch:train:14701-14800batch: iter_time=4.352e-04, forward_time=0.355, loss_ctc=52.385, loss_att=61.827, acc=0.745, loss=58.995, backward_time=0.446, grad_norm=34.476, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.183e-04, train_time=1.727 +[gpub022:0/16] 2024-02-02 19:46:13,421 (trainer:737) INFO: 21epoch:train:14801-14900batch: iter_time=8.995e-05, forward_time=0.293, loss_ctc=49.567, loss_att=51.160, acc=0.756, loss=50.682, backward_time=0.408, grad_norm=32.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.183e-04, train_time=1.350 +[gpub022:0/16] 2024-02-02 19:49:22,455 (trainer:737) INFO: 21epoch:train:14901-15000batch: iter_time=1.002e-04, forward_time=0.356, loss_ctc=41.506, loss_att=43.495, acc=0.752, loss=42.899, backward_time=0.436, grad_norm=30.818, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.183e-04, train_time=1.890 +[gpub022:0/16] 2024-02-02 20:26:47,099 (trainer:343) INFO: 21epoch results: [train] iter_time=0.247, forward_time=0.321, loss_ctc=47.938, loss_att=47.939, acc=0.748, loss=47.938, backward_time=0.420, grad_norm=34.447, clip=100.000, loss_scale=5.558e+33, optim_step_time=0.096, optim0_lr0=2.209e-04, train_time=1.795, time=7 hours, 29 minutes and 11.92 seconds, total_count=345000, gpu_max_cached_mem_GB=41.891, [valid] loss_ctc=41.764, cer_ctc=0.209, loss_att=43.571, acc=0.655, cer=0.321, wer=0.998, loss=43.029, time=37 minutes and 0.61 seconds, total_count=107433, gpu_max_cached_mem_GB=41.891 +[gpub022:0/16] 2024-02-02 20:26:57,139 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub022:0/16] 2024-02-02 20:26:57,255 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/16epoch.pth +[gpub022:0/16] 2024-02-02 20:26:57,256 (trainer:272) INFO: 22/45epoch started. Estimated time to finish: 1 week, 1 day and 8 hours +[gpub022:0/16] 2024-02-02 20:26:57,265 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub022:0/16] 2024-02-02 20:27:15,321 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub022:0/16] 2024-02-02 20:27:19,022 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub022:0/16] 2024-02-02 20:27:19,022 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub022:0/16] 2024-02-02 20:27:19,025 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2913929.0 ON gpub022 CANCELLED AT 2024-02-02T20:28:23 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.12.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.12.log new file mode 100644 index 0000000000000000000000000000000000000000..0039276a5604214a03fa5ff80e80f27ef711f48c --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.12.log @@ -0,0 +1,2570 @@ +# Running on gpub010.delta.ncsa.illinois.edu +# Started at Sun Jan 28 15:01:55 CST 2024 +# SLURMD_NODENAME=gpub010 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2904202 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1706648496 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2904202 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[010,013,072,096]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706475696 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[010,013,072,096]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=1808814 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub010 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_9ef50019-41d1-40ae-9a78-a780753c8878 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_9ef50019-41d1-40ae-9a78-a780753c8878 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_9ef50019-41d1-40ae-9a78-a780753c8878 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_9ef50019-41d1-40ae-9a78-a780753c8878 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_9ef50019-41d1-40ae-9a78-a780753c8878 +[gpub010:0/16] 2024-01-28 15:05:36,449 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub010:0/16] 2024-01-28 15:05:36,475 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub010:0/16] 2024-01-28 15:05:36,517 (s2t:464) INFO: Vocabulary size: 50002 +[gpub010:0/16] 2024-01-28 15:05:45,374 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub010:0/16] 2024-01-28 15:05:45,380 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub010:0/16] 2024-01-28 15:05:45,380 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub010:0/16] 2024-01-28 15:05:45,380 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub010:0/16] 2024-01-28 15:05:45,399 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub010:0/16] 2024-01-28 15:05:50,729 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 15:05:51,655 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 15:05:51,655 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub010:0/16] 2024-01-28 15:05:51,657 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 15:06:03,201 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub010:1808893:1808893 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1808893:1808893 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1808893:1808893 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub010:0/16] 2024-01-28 15:06:08,624 (trainer:284) INFO: 13/45epoch started +[gpub010:0/16] 2024-01-28 15:06:08,669 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-01-28 15:06:26,526 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 15:06:29,890 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 15:06:29,890 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-01-28 15:06:29,893 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub013:1867174:1867174 [0] NCCL INFO cudaDriverVersion 12020 +gpub013:1867174:1867174 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1867174:1867174 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1867174:1867227 [0] NCCL INFO NET/IB : No device found. +gpub013:1867174:1867227 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.113<0> [1]hsn0:141.142.145.113<0> +gpub013:1867174:1867227 [0] NCCL INFO Using network Socket +gpub013:1867174:1867227 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub013:1867174:1867227 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub013:1867174:1867227 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub013:1867174:1867227 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub013:1867174:1867227 [0] NCCL INFO Connected all rings +gpub013:1867174:1867227 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub013:1867174:1867227 [0] NCCL INFO Connected all trees +gpub013:1867174:1867227 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub013:1867174:1867227 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1867174:1867227 [0] NCCL INFO comm 0x16735ff0 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub072:588459:588459 [2] NCCL INFO cudaDriverVersion 12020 +gpub072:588459:588459 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:588459:588459 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:588459:588520 [2] NCCL INFO NET/IB : No device found. +gpub072:588459:588520 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.172<0> [1]hsn0:141.142.145.172<0> +gpub072:588459:588520 [2] NCCL INFO Using network Socket +gpub072:588459:588520 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub072:588459:588520 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub072:588459:588520 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub072:588459:588520 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub072:588459:588520 [2] NCCL INFO Connected all rings +gpub072:588459:588520 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub096:577141:577141 [2] NCCL INFO cudaDriverVersion 12020 +gpub096:577141:577141 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:577141:577141 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:577141:577201 [2] NCCL INFO NET/IB : No device found. +gpub096:577141:577201 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.196<0> [1]hsn0:141.142.145.196<0> [2]eth0:fe80::9229:91c2:3352:9f4c%eth0<0> +gpub096:577141:577201 [2] NCCL INFO Using network Socket +gpub096:577141:577201 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub096:577141:577201 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub096:577141:577201 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub096:577141:577201 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub096:577141:577201 [2] NCCL INFO Connected all rings +gpub096:577141:577201 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub013:1867177:1867177 [3] NCCL INFO cudaDriverVersion 12020 +gpub013:1867177:1867177 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1867177:1867177 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1867177:1867226 [3] NCCL INFO NET/IB : No device found. +gpub013:1867177:1867226 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.113<0> [1]hsn0:141.142.145.113<0> +gpub013:1867177:1867226 [3] NCCL INFO Using network Socket +gpub013:1867177:1867226 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub013:1867177:1867226 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub013:1867177:1867226 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub013:1867177:1867226 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub013:1867177:1867226 [3] NCCL INFO Connected all rings +gpub013:1867177:1867226 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub072:588459:588520 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub072:588459:588520 [2] NCCL INFO Connected all trees +gpub072:588459:588520 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub072:588459:588520 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:588459:588520 [2] NCCL INFO comm 0x1ab85400 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub096:577141:577201 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub096:577141:577201 [2] NCCL INFO Connected all trees +gpub096:577141:577201 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub096:577141:577201 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:577141:577201 [2] NCCL INFO comm 0x14878610 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1867177:1867226 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub013:1867177:1867226 [3] NCCL INFO Connected all trees +gpub013:1867177:1867226 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub013:1867177:1867226 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1867177:1867226 [3] NCCL INFO comm 0x1400c000 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub072:588460:588460 [3] NCCL INFO cudaDriverVersion 12020 +gpub072:588460:588460 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:588460:588460 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:588460:588519 [3] NCCL INFO NET/IB : No device found. +gpub072:588460:588519 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.172<0> [1]hsn0:141.142.145.172<0> +gpub072:588460:588519 [3] NCCL INFO Using network Socket +gpub072:588460:588519 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub072:588460:588519 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub072:588460:588519 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub072:588460:588519 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub072:588460:588519 [3] NCCL INFO Connected all rings +gpub072:588460:588519 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub013:1867175:1867175 [1] NCCL INFO cudaDriverVersion 12020 +gpub013:1867175:1867175 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1867175:1867175 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1867175:1867229 [1] NCCL INFO NET/IB : No device found. +gpub013:1867175:1867229 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.113<0> [1]hsn0:141.142.145.113<0> +gpub013:1867175:1867229 [1] NCCL INFO Using network Socket +gpub013:1867175:1867229 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub013:1867175:1867229 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub013:1867175:1867229 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub013:1867175:1867229 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub013:1867175:1867229 [1] NCCL INFO Connected all rings +gpub013:1867175:1867229 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub072:588460:588519 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub072:588460:588519 [3] NCCL INFO Connected all trees +gpub072:588460:588519 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub072:588460:588519 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:588460:588519 [3] NCCL INFO comm 0x17ea23b0 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub013:1867175:1867229 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub013:1867175:1867229 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub013:1867175:1867229 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub013:1867175:1867229 [1] NCCL INFO Connected all trees +gpub013:1867175:1867229 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub013:1867175:1867229 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1867175:1867229 [1] NCCL INFO comm 0x155434a0 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub072:588457:588457 [0] NCCL INFO cudaDriverVersion 12020 +gpub072:588457:588457 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:588457:588457 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:588457:588518 [0] NCCL INFO NET/IB : No device found. +gpub072:588457:588518 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.172<0> [1]hsn0:141.142.145.172<0> +gpub072:588457:588518 [0] NCCL INFO Using network Socket +gpub072:588457:588518 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub072:588457:588518 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub072:588457:588518 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub072:588457:588518 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub072:588457:588518 [0] NCCL INFO Connected all rings +gpub072:588457:588518 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub072:588457:588518 [0] NCCL INFO Connected all trees +gpub072:588457:588518 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub072:588457:588518 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:588457:588518 [0] NCCL INFO comm 0xf582490 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1808893:1808947 [0] NCCL INFO NET/IB : No device found. +gpub010:1808893:1808947 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.110<0> [1]hsn0:141.142.145.110<0> [2]eth0:fe80::81a7:cd6e:eca1:d6c4%eth0<0> +gpub010:1808893:1808947 [0] NCCL INFO Using network Socket +gpub010:1808893:1808947 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub010:1808893:1808947 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub010:1808893:1808947 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub010:1808893:1808947 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub010:1808893:1808947 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub010:1808893:1808947 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub010:1808893:1808947 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub010:1808893:1808947 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub010:1808893:1808947 [0] NCCL INFO Connected all rings +gpub010:1808893:1808947 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub010:1808893:1808947 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub010:1808893:1808947 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpub010:1808893:1808947 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub010:1808893:1808947 [0] NCCL INFO Connected all trees +gpub010:1808893:1808947 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:1808893:1808947 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1808893:1808947 [0] NCCL INFO comm 0x1c26c200 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1808896:1808896 [3] NCCL INFO cudaDriverVersion 12020 +gpub010:1808896:1808896 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1808896:1808896 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1808896:1808949 [3] NCCL INFO NET/IB : No device found. +gpub010:1808896:1808949 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.110<0> [1]hsn0:141.142.145.110<0> [2]eth0:fe80::81a7:cd6e:eca1:d6c4%eth0<0> +gpub010:1808896:1808949 [3] NCCL INFO Using network Socket +gpub010:1808896:1808949 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub010:1808896:1808949 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub010:1808896:1808949 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub010:1808896:1808949 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub010:1808896:1808949 [3] NCCL INFO Connected all rings +gpub010:1808896:1808949 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub010:1808896:1808949 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub010:1808896:1808949 [3] NCCL INFO Connected all trees +gpub010:1808896:1808949 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:1808896:1808949 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1808896:1808949 [3] NCCL INFO comm 0xda4cb10 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub010:1808895:1808895 [2] NCCL INFO cudaDriverVersion 12020 +gpub010:1808895:1808895 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1808895:1808895 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1808895:1808950 [2] NCCL INFO NET/IB : No device found. +gpub010:1808895:1808950 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.110<0> [1]hsn0:141.142.145.110<0> [2]eth0:fe80::81a7:cd6e:eca1:d6c4%eth0<0> +gpub010:1808895:1808950 [2] NCCL INFO Using network Socket +gpub010:1808895:1808950 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub010:1808895:1808950 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub010:1808895:1808950 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub010:1808895:1808950 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub010:1808895:1808950 [2] NCCL INFO Connected all rings +gpub010:1808895:1808950 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub010:1808895:1808950 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub010:1808895:1808950 [2] NCCL INFO Connected all trees +gpub010:1808895:1808950 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:1808895:1808950 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1808895:1808950 [2] NCCL INFO comm 0x15436610 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub010:1808894:1808894 [1] NCCL INFO cudaDriverVersion 12020 +gpub010:1808894:1808894 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1808894:1808894 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1808894:1808948 [1] NCCL INFO NET/IB : No device found. +gpub010:1808894:1808948 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.110<0> [1]hsn0:141.142.145.110<0> [2]eth0:fe80::81a7:cd6e:eca1:d6c4%eth0<0> +gpub010:1808894:1808948 [1] NCCL INFO Using network Socket +gpub010:1808894:1808948 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub010:1808894:1808948 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub010:1808894:1808948 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub010:1808894:1808948 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub010:1808894:1808948 [1] NCCL INFO Connected all rings +gpub010:1808894:1808948 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub010:1808894:1808948 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub010:1808894:1808948 [1] NCCL INFO Connected all trees +gpub010:1808894:1808948 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:1808894:1808948 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1808894:1808948 [1] NCCL INFO comm 0x1ca388d0 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub096:577142:577142 [3] NCCL INFO cudaDriverVersion 12020 +gpub096:577142:577142 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:577142:577142 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:577142:577200 [3] NCCL INFO NET/IB : No device found. +gpub096:577142:577200 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.196<0> [1]hsn0:141.142.145.196<0> [2]eth0:fe80::9229:91c2:3352:9f4c%eth0<0> +gpub096:577142:577200 [3] NCCL INFO Using network Socket +gpub096:577142:577200 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub096:577142:577200 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub096:577142:577200 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub096:577142:577200 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub096:577142:577200 [3] NCCL INFO Connected all rings +gpub096:577142:577200 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub096:577142:577200 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub096:577142:577200 [3] NCCL INFO Connected all trees +gpub096:577142:577200 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub096:577142:577200 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:577142:577200 [3] NCCL INFO comm 0x163a2fa0 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub072:588458:588458 [1] NCCL INFO cudaDriverVersion 12020 +gpub072:588458:588458 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:588458:588458 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:588458:588517 [1] NCCL INFO NET/IB : No device found. +gpub072:588458:588517 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.172<0> [1]hsn0:141.142.145.172<0> +gpub072:588458:588517 [1] NCCL INFO Using network Socket +gpub072:588458:588517 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub072:588458:588517 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub072:588458:588517 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub072:588458:588517 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub072:588458:588517 [1] NCCL INFO Connected all rings +gpub072:588458:588517 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub072:588458:588517 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub072:588458:588517 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub072:588458:588517 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub072:588458:588517 [1] NCCL INFO Connected all trees +gpub072:588458:588517 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub072:588458:588517 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:588458:588517 [1] NCCL INFO comm 0x12e3c790 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub096:577140:577140 [1] NCCL INFO cudaDriverVersion 12020 +gpub096:577140:577140 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:577140:577140 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:577140:577199 [1] NCCL INFO NET/IB : No device found. +gpub096:577140:577199 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.196<0> [1]hsn0:141.142.145.196<0> [2]eth0:fe80::9229:91c2:3352:9f4c%eth0<0> +gpub096:577140:577199 [1] NCCL INFO Using network Socket +gpub096:577140:577199 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub096:577140:577199 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub096:577140:577199 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub096:577140:577199 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub096:577140:577199 [1] NCCL INFO Connected all rings +gpub096:577140:577199 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub096:577140:577199 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub096:577140:577199 [1] NCCL INFO Connected all trees +gpub096:577140:577199 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub096:577140:577199 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:577140:577199 [1] NCCL INFO comm 0x1748b460 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub013:1867176:1867176 [2] NCCL INFO cudaDriverVersion 12020 +gpub013:1867176:1867176 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1867176:1867176 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1867176:1867228 [2] NCCL INFO NET/IB : No device found. +gpub013:1867176:1867228 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.113<0> [1]hsn0:141.142.145.113<0> +gpub013:1867176:1867228 [2] NCCL INFO Using network Socket +gpub013:1867176:1867228 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub013:1867176:1867228 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub013:1867176:1867228 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub013:1867176:1867228 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub013:1867176:1867228 [2] NCCL INFO Connected all rings +gpub013:1867176:1867228 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub013:1867176:1867228 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub013:1867176:1867228 [2] NCCL INFO Connected all trees +gpub013:1867176:1867228 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub013:1867176:1867228 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1867176:1867228 [2] NCCL INFO comm 0x1579c4f0 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub096:577139:577139 [0] NCCL INFO cudaDriverVersion 12020 +gpub096:577139:577139 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:577139:577139 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:577139:577202 [0] NCCL INFO NET/IB : No device found. +gpub096:577139:577202 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.196<0> [1]hsn0:141.142.145.196<0> [2]eth0:fe80::9229:91c2:3352:9f4c%eth0<0> +gpub096:577139:577202 [0] NCCL INFO Using network Socket +gpub096:577139:577202 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub096:577139:577202 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub096:577139:577202 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub096:577139:577202 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub096:577139:577202 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub096:577139:577202 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub096:577139:577202 [0] NCCL INFO Connected all rings +gpub096:577139:577202 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub096:577139:577202 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub096:577139:577202 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub096:577139:577202 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub096:577139:577202 [0] NCCL INFO Connected all trees +gpub096:577139:577202 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub096:577139:577202 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:577139:577202 [0] NCCL INFO comm 0x171a4770 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +[gpub010:0/16] 2024-01-28 15:14:27,611 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub010:0/16] 2024-01-28 15:17:37,381 (trainer:737) INFO: 13epoch:train:1-100batch: iter_time=4.851, forward_time=0.604, loss_ctc=66.074, loss_att=62.668, acc=0.690, loss=63.690, backward_time=0.563, grad_norm=38.933, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.119, optim0_lr0=2.886e-04, train_time=6.875 +[gpub010:0/16] 2024-01-28 15:20:43,801 (trainer:737) INFO: 13epoch:train:101-200batch: iter_time=8.525e-04, forward_time=0.569, loss_ctc=51.175, loss_att=54.576, acc=0.704, loss=53.556, backward_time=0.523, grad_norm=31.595, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.111, optim0_lr0=2.886e-04, train_time=1.874 +[gpub010:0/16] 2024-01-28 15:23:49,393 (trainer:737) INFO: 13epoch:train:201-300batch: iter_time=2.730e-04, forward_time=0.485, loss_ctc=69.042, loss_att=61.789, acc=0.704, loss=63.965, backward_time=0.519, grad_norm=43.588, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.102, optim0_lr0=2.885e-04, train_time=1.856 +[gpub010:0/16] 2024-01-28 15:26:52,612 (trainer:737) INFO: 13epoch:train:301-400batch: iter_time=5.821e-04, forward_time=0.535, loss_ctc=68.647, loss_att=62.215, acc=0.726, loss=64.145, backward_time=0.487, grad_norm=35.438, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.104, optim0_lr0=2.884e-04, train_time=1.833 +[gpub010:0/16] 2024-01-28 15:29:41,550 (trainer:737) INFO: 13epoch:train:401-500batch: iter_time=0.001, forward_time=0.489, loss_ctc=69.470, loss_att=58.680, acc=0.690, loss=61.917, backward_time=0.490, grad_norm=39.241, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.103, optim0_lr0=2.883e-04, train_time=1.689 +[gpub010:0/16] 2024-01-28 15:32:42,770 (trainer:737) INFO: 13epoch:train:501-600batch: iter_time=0.001, forward_time=0.512, loss_ctc=45.513, loss_att=40.988, acc=0.726, loss=42.345, backward_time=0.478, grad_norm=29.596, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.109, optim0_lr0=2.882e-04, train_time=1.813 +[gpub010:0/16] 2024-01-28 15:35:44,628 (trainer:737) INFO: 13epoch:train:601-700batch: iter_time=4.440e-04, forward_time=0.469, loss_ctc=60.484, loss_att=56.909, acc=0.721, loss=57.982, backward_time=0.487, grad_norm=32.677, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.100, optim0_lr0=2.882e-04, train_time=1.818 +[gpub010:0/16] 2024-01-28 15:38:42,906 (trainer:737) INFO: 13epoch:train:701-800batch: iter_time=4.578e-04, forward_time=0.534, loss_ctc=50.878, loss_att=49.465, acc=0.747, loss=49.889, backward_time=0.501, grad_norm=27.035, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.102, optim0_lr0=2.881e-04, train_time=1.782 +[gpub010:0/16] 2024-01-28 15:41:28,009 (trainer:737) INFO: 13epoch:train:801-900batch: iter_time=0.001, forward_time=0.459, loss_ctc=60.589, loss_att=56.385, acc=0.736, loss=57.646, backward_time=0.506, grad_norm=31.863, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.101, optim0_lr0=2.880e-04, train_time=1.651 +[gpub010:0/16] 2024-01-28 15:44:18,228 (trainer:737) INFO: 13epoch:train:901-1000batch: iter_time=5.075e-04, forward_time=0.468, loss_ctc=59.426, loss_att=50.820, acc=0.718, loss=53.402, backward_time=0.472, grad_norm=34.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.101, optim0_lr0=2.879e-04, train_time=1.702 +[gpub010:0/16] 2024-01-28 15:47:00,984 (trainer:737) INFO: 13epoch:train:1001-1100batch: iter_time=6.327e-04, forward_time=0.421, loss_ctc=60.368, loss_att=50.436, acc=0.727, loss=53.416, backward_time=0.451, grad_norm=36.210, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.101, optim0_lr0=2.878e-04, train_time=1.628 +[gpub010:0/16] 2024-01-28 15:49:57,810 (trainer:737) INFO: 13epoch:train:1101-1200batch: iter_time=5.785e-04, forward_time=0.440, loss_ctc=62.132, loss_att=56.537, acc=0.736, loss=58.216, backward_time=0.449, grad_norm=32.459, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.099, optim0_lr0=2.878e-04, train_time=1.766 +[gpub010:0/16] 2024-01-28 15:51:40,024 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-01-28 15:51:58,482 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 15:52:01,868 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 15:52:01,869 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-01-28 15:52:01,934 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 15:58:12,623 (trainer:737) INFO: 13epoch:train:1201-1300batch: iter_time=3.247, forward_time=0.348, loss_ctc=67.618, loss_att=62.085, acc=0.693, loss=63.745, backward_time=0.437, grad_norm=40.280, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.094, optim0_lr0=2.877e-04, train_time=4.950 +[gpub010:0/16] 2024-01-28 16:00:59,169 (trainer:737) INFO: 13epoch:train:1301-1400batch: iter_time=8.079e-05, forward_time=0.317, loss_ctc=52.719, loss_att=57.334, acc=0.697, loss=55.950, backward_time=0.403, grad_norm=33.338, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.091, optim0_lr0=2.876e-04, train_time=1.665 +[gpub010:0/16] 2024-01-28 16:03:28,939 (trainer:737) INFO: 13epoch:train:1401-1500batch: iter_time=8.004e-05, forward_time=0.348, loss_ctc=50.977, loss_att=51.454, acc=0.714, loss=51.311, backward_time=0.454, grad_norm=30.279, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.102, optim0_lr0=2.875e-04, train_time=1.497 +[gpub010:0/16] 2024-01-28 16:05:52,557 (trainer:737) INFO: 13epoch:train:1501-1600batch: iter_time=8.779e-05, forward_time=0.295, loss_ctc=77.672, loss_att=67.212, acc=0.721, loss=70.350, backward_time=0.408, grad_norm=44.008, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.091, optim0_lr0=2.874e-04, train_time=1.436 +[gpub010:0/16] 2024-01-28 16:08:36,927 (trainer:737) INFO: 13epoch:train:1601-1700batch: iter_time=8.102e-05, forward_time=0.336, loss_ctc=64.719, loss_att=60.719, acc=0.697, loss=61.919, backward_time=0.416, grad_norm=34.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.091, optim0_lr0=2.874e-04, train_time=1.643 +[gpub010:0/16] 2024-01-28 16:11:22,966 (trainer:737) INFO: 13epoch:train:1701-1800batch: iter_time=7.758e-05, forward_time=0.349, loss_ctc=56.681, loss_att=45.850, acc=0.729, loss=49.100, backward_time=0.426, grad_norm=33.011, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.095, optim0_lr0=2.873e-04, train_time=1.660 +[gpub010:0/16] 2024-01-28 16:13:58,576 (trainer:737) INFO: 13epoch:train:1801-1900batch: iter_time=8.334e-05, forward_time=0.296, loss_ctc=48.133, loss_att=40.652, acc=0.737, loss=42.896, backward_time=0.404, grad_norm=27.348, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.091, optim0_lr0=2.872e-04, train_time=1.555 +[gpub010:0/16] 2024-01-28 16:16:29,589 (trainer:737) INFO: 13epoch:train:1901-2000batch: iter_time=8.651e-05, forward_time=0.293, loss_ctc=59.730, loss_att=60.809, acc=0.735, loss=60.485, backward_time=0.407, grad_norm=31.160, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.091, optim0_lr0=2.871e-04, train_time=1.510 +[gpub010:0/16] 2024-01-28 16:18:58,142 (trainer:737) INFO: 13epoch:train:2001-2100batch: iter_time=2.748e-04, forward_time=0.379, loss_ctc=45.455, loss_att=49.816, acc=0.738, loss=48.508, backward_time=0.445, grad_norm=25.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.097, optim0_lr0=2.870e-04, train_time=1.484 +[gpub010:0/16] 2024-01-28 16:21:43,971 (trainer:737) INFO: 13epoch:train:2101-2200batch: iter_time=9.702e-05, forward_time=0.310, loss_ctc=65.616, loss_att=51.313, acc=0.738, loss=55.604, backward_time=0.412, grad_norm=35.308, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.092, optim0_lr0=2.870e-04, train_time=1.659 +[gpub010:0/16] 2024-01-28 16:24:04,232 (trainer:737) INFO: 13epoch:train:2201-2300batch: iter_time=8.561e-05, forward_time=0.289, loss_ctc=56.296, loss_att=50.849, acc=0.711, loss=52.483, backward_time=0.400, grad_norm=32.716, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.869e-04, train_time=1.402 +[gpub010:0/16] 2024-01-28 16:26:37,722 (trainer:737) INFO: 13epoch:train:2301-2400batch: iter_time=3.296e-04, forward_time=0.365, loss_ctc=55.955, loss_att=47.137, acc=0.747, loss=49.783, backward_time=0.441, grad_norm=29.908, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.098, optim0_lr0=2.868e-04, train_time=1.535 +[gpub010:0/16] 2024-01-28 16:29:31,460 (trainer:737) INFO: 13epoch:train:2401-2500batch: iter_time=8.652e-05, forward_time=0.334, loss_ctc=66.936, loss_att=64.730, acc=0.713, loss=65.392, backward_time=0.414, grad_norm=38.841, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.093, optim0_lr0=2.867e-04, train_time=1.736 +[gpub010:0/16] 2024-01-28 16:29:51,618 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-01-28 16:30:09,759 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 16:30:13,402 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 16:30:13,402 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-01-28 16:30:13,405 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 16:40:41,895 (trainer:737) INFO: 13epoch:train:2501-2600batch: iter_time=3.211, forward_time=0.285, loss_ctc=61.443, loss_att=59.119, acc=0.698, loss=59.816, backward_time=0.390, grad_norm=36.200, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.867e-04, train_time=6.705 +[gpub010:0/16] 2024-01-28 16:43:44,359 (trainer:737) INFO: 13epoch:train:2601-2700batch: iter_time=8.341e-05, forward_time=0.316, loss_ctc=49.017, loss_att=53.498, acc=0.705, loss=52.154, backward_time=0.415, grad_norm=32.093, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.092, optim0_lr0=2.866e-04, train_time=1.824 +[gpub010:0/16] 2024-01-28 16:46:46,077 (trainer:737) INFO: 13epoch:train:2701-2800batch: iter_time=8.554e-05, forward_time=0.376, loss_ctc=66.238, loss_att=59.018, acc=0.712, loss=61.184, backward_time=0.427, grad_norm=41.129, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.095, optim0_lr0=2.865e-04, train_time=1.817 +[gpub010:0/16] 2024-01-28 16:49:39,727 (trainer:737) INFO: 13epoch:train:2801-2900batch: iter_time=8.474e-05, forward_time=0.294, loss_ctc=64.100, loss_att=58.043, acc=0.736, loss=59.860, backward_time=0.405, grad_norm=34.174, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.864e-04, train_time=1.735 +[gpub010:0/16] 2024-01-28 16:52:31,139 (trainer:737) INFO: 13epoch:train:2901-3000batch: iter_time=9.119e-05, forward_time=0.291, loss_ctc=65.799, loss_att=55.937, acc=0.699, loss=58.895, backward_time=0.400, grad_norm=36.389, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.863e-04, train_time=1.715 +[gpub010:0/16] 2024-01-28 16:55:21,943 (trainer:737) INFO: 13epoch:train:3001-3100batch: iter_time=8.673e-05, forward_time=0.335, loss_ctc=44.791, loss_att=39.977, acc=0.734, loss=41.421, backward_time=0.413, grad_norm=27.763, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.095, optim0_lr0=2.863e-04, train_time=1.708 +[gpub010:0/16] 2024-01-28 16:58:14,693 (trainer:737) INFO: 13epoch:train:3101-3200batch: iter_time=2.071e-04, forward_time=0.372, loss_ctc=59.232, loss_att=55.357, acc=0.728, loss=56.520, backward_time=0.432, grad_norm=31.202, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.095, optim0_lr0=2.862e-04, train_time=1.727 +[gpub010:0/16] 2024-01-28 17:00:49,258 (trainer:737) INFO: 13epoch:train:3201-3300batch: iter_time=8.887e-05, forward_time=0.289, loss_ctc=49.851, loss_att=47.859, acc=0.753, loss=48.456, backward_time=0.401, grad_norm=26.097, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.861e-04, train_time=1.545 +[gpub010:0/16] 2024-01-28 17:03:30,356 (trainer:737) INFO: 13epoch:train:3301-3400batch: iter_time=8.600e-05, forward_time=0.291, loss_ctc=58.625, loss_att=56.177, acc=0.735, loss=56.912, backward_time=0.401, grad_norm=30.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.860e-04, train_time=1.610 +[gpub010:0/16] 2024-01-28 17:06:24,568 (trainer:737) INFO: 13epoch:train:3401-3500batch: iter_time=9.140e-05, forward_time=0.322, loss_ctc=57.637, loss_att=49.336, acc=0.725, loss=51.827, backward_time=0.427, grad_norm=34.423, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.096, optim0_lr0=2.859e-04, train_time=1.742 +[gpub010:0/16] 2024-01-28 17:09:14,548 (trainer:737) INFO: 13epoch:train:3501-3600batch: iter_time=8.967e-04, forward_time=0.369, loss_ctc=58.227, loss_att=49.467, acc=0.730, loss=52.095, backward_time=0.429, grad_norm=31.897, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.097, optim0_lr0=2.859e-04, train_time=1.700 +[gpub010:0/16] 2024-01-28 17:11:51,647 (trainer:737) INFO: 13epoch:train:3601-3700batch: iter_time=8.451e-05, forward_time=0.291, loss_ctc=60.345, loss_att=55.535, acc=0.738, loss=56.978, backward_time=0.401, grad_norm=32.072, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.090, optim0_lr0=2.858e-04, train_time=1.571 +[gpub010:0/16] 2024-01-28 17:13:29,320 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-01-28 17:13:47,769 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 17:13:51,250 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 17:13:51,250 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-01-28 17:13:51,312 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 17:23:36,571 (trainer:737) INFO: 13epoch:train:3701-3800batch: iter_time=3.280, forward_time=0.333, loss_ctc=66.105, loss_att=60.828, acc=0.699, loss=62.411, backward_time=0.404, grad_norm=38.363, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.091, optim0_lr0=2.857e-04, train_time=7.049 +[gpub010:0/16] 2024-01-28 17:27:57,030 (trainer:737) INFO: 13epoch:train:3801-3900batch: iter_time=8.881e-05, forward_time=0.356, loss_ctc=51.281, loss_att=55.376, acc=0.705, loss=54.147, backward_time=0.420, grad_norm=31.643, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.094, optim0_lr0=2.856e-04, train_time=2.604 +[gpub010:0/16] 2024-01-28 17:33:47,588 (trainer:737) INFO: 13epoch:train:3901-4000batch: iter_time=8.507e-05, forward_time=0.289, loss_ctc=50.508, loss_att=50.832, acc=0.718, loss=50.735, backward_time=0.391, grad_norm=29.561, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.090, optim0_lr0=2.856e-04, train_time=3.506 +[gpub010:0/16] 2024-01-28 17:40:34,472 (trainer:737) INFO: 13epoch:train:4001-4100batch: iter_time=8.601e-05, forward_time=0.291, loss_ctc=77.604, loss_att=66.133, acc=0.724, loss=69.574, backward_time=0.396, grad_norm=40.847, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.090, optim0_lr0=2.855e-04, train_time=4.069 +[gpub010:0/16] 2024-01-28 17:46:55,997 (trainer:737) INFO: 13epoch:train:4101-4200batch: iter_time=8.473e-05, forward_time=0.315, loss_ctc=63.386, loss_att=60.644, acc=0.696, loss=61.467, backward_time=0.426, grad_norm=34.317, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.100, optim0_lr0=2.854e-04, train_time=3.815 +[gpub010:0/16] 2024-01-28 17:55:07,801 (trainer:737) INFO: 13epoch:train:4201-4300batch: iter_time=9.727e-05, forward_time=0.331, loss_ctc=54.213, loss_att=45.218, acc=0.733, loss=47.917, backward_time=0.426, grad_norm=31.697, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.094, optim0_lr0=2.853e-04, train_time=4.917 +[gpub010:0/16] 2024-01-28 17:58:03,480 (trainer:737) INFO: 13epoch:train:4301-4400batch: iter_time=9.173e-05, forward_time=0.286, loss_ctc=47.366, loss_att=39.626, acc=0.741, loss=41.948, backward_time=0.397, grad_norm=26.445, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.852e-04, train_time=1.757 +[gpub010:0/16] 2024-01-28 18:00:51,932 (trainer:737) INFO: 13epoch:train:4401-4500batch: iter_time=8.193e-05, forward_time=0.311, loss_ctc=59.164, loss_att=59.782, acc=0.737, loss=59.596, backward_time=0.404, grad_norm=29.540, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.852e-04, train_time=1.684 +[gpub010:0/16] 2024-01-28 18:03:38,150 (trainer:737) INFO: 13epoch:train:4501-4600batch: iter_time=8.421e-05, forward_time=0.327, loss_ctc=44.225, loss_att=48.881, acc=0.740, loss=47.484, backward_time=0.415, grad_norm=25.317, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.094, optim0_lr0=2.851e-04, train_time=1.662 +[gpub010:0/16] 2024-01-28 18:06:25,915 (trainer:737) INFO: 13epoch:train:4601-4700batch: iter_time=8.544e-05, forward_time=0.340, loss_ctc=63.717, loss_att=50.174, acc=0.743, loss=54.237, backward_time=0.422, grad_norm=33.435, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.095, optim0_lr0=2.850e-04, train_time=1.677 +[gpub010:0/16] 2024-01-28 18:09:23,270 (trainer:737) INFO: 13epoch:train:4701-4800batch: iter_time=7.967e-05, forward_time=0.330, loss_ctc=55.196, loss_att=50.683, acc=0.713, loss=52.037, backward_time=0.431, grad_norm=31.583, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.849e-04, train_time=1.773 +[gpub010:0/16] 2024-01-28 18:12:03,701 (trainer:737) INFO: 13epoch:train:4801-4900batch: iter_time=8.647e-05, forward_time=0.291, loss_ctc=54.725, loss_att=45.863, acc=0.753, loss=48.522, backward_time=0.402, grad_norm=29.294, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.849e-04, train_time=1.604 +[gpub010:0/16] 2024-01-28 18:14:52,577 (trainer:737) INFO: 13epoch:train:4901-5000batch: iter_time=9.025e-05, forward_time=0.332, loss_ctc=65.865, loss_att=63.876, acc=0.716, loss=64.473, backward_time=0.418, grad_norm=37.958, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.092, optim0_lr0=2.848e-04, train_time=1.688 +[gpub010:0/16] 2024-01-28 18:15:12,734 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-01-28 18:15:31,399 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 18:15:34,874 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 18:15:34,874 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-01-28 18:15:34,939 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 18:22:56,838 (trainer:737) INFO: 13epoch:train:5001-5100batch: iter_time=3.312, forward_time=0.355, loss_ctc=60.765, loss_att=59.960, acc=0.691, loss=60.202, backward_time=0.410, grad_norm=37.070, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.094, optim0_lr0=2.847e-04, train_time=4.843 +[gpub010:0/16] 2024-01-28 18:25:18,840 (trainer:737) INFO: 13epoch:train:5101-5200batch: iter_time=8.866e-05, forward_time=0.288, loss_ctc=48.148, loss_att=55.156, acc=0.698, loss=53.054, backward_time=0.400, grad_norm=32.402, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.846e-04, train_time=1.420 +[gpub010:0/16] 2024-01-28 18:27:53,088 (trainer:737) INFO: 13epoch:train:5201-5300batch: iter_time=8.253e-05, forward_time=0.328, loss_ctc=64.990, loss_att=58.262, acc=0.710, loss=60.281, backward_time=0.421, grad_norm=37.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.092, optim0_lr0=2.846e-04, train_time=1.542 +[gpub010:0/16] 2024-01-28 18:30:43,332 (trainer:737) INFO: 13epoch:train:5301-5400batch: iter_time=8.722e-05, forward_time=0.384, loss_ctc=62.843, loss_att=58.152, acc=0.721, loss=59.559, backward_time=0.426, grad_norm=34.948, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.094, optim0_lr0=2.845e-04, train_time=1.702 +[gpub010:0/16] 2024-01-28 18:32:59,262 (trainer:737) INFO: 13epoch:train:5401-5500batch: iter_time=8.580e-05, forward_time=0.291, loss_ctc=64.003, loss_att=55.935, acc=0.694, loss=58.356, backward_time=0.402, grad_norm=38.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.844e-04, train_time=1.359 +[gpub010:0/16] 2024-01-28 18:35:10,903 (trainer:737) INFO: 13epoch:train:5501-5600batch: iter_time=8.737e-05, forward_time=0.287, loss_ctc=44.220, loss_att=39.934, acc=0.728, loss=41.220, backward_time=0.399, grad_norm=27.795, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.843e-04, train_time=1.316 +[gpub010:0/16] 2024-01-28 18:38:06,073 (trainer:737) INFO: 13epoch:train:5601-5700batch: iter_time=9.770e-05, forward_time=0.387, loss_ctc=58.187, loss_att=56.352, acc=0.709, loss=56.903, backward_time=0.436, grad_norm=32.360, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.099, optim0_lr0=2.842e-04, train_time=1.751 +[gpub010:0/16] 2024-01-28 18:40:31,217 (trainer:737) INFO: 13epoch:train:5701-5800batch: iter_time=9.398e-05, forward_time=0.289, loss_ctc=48.976, loss_att=48.317, acc=0.749, loss=48.515, backward_time=0.401, grad_norm=27.045, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.842e-04, train_time=1.452 +[gpub010:0/16] 2024-01-28 18:42:48,702 (trainer:737) INFO: 13epoch:train:5801-5900batch: iter_time=9.118e-05, forward_time=0.291, loss_ctc=57.098, loss_att=56.026, acc=0.727, loss=56.348, backward_time=0.404, grad_norm=32.225, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.841e-04, train_time=1.375 +[gpub010:0/16] 2024-01-28 18:45:33,097 (trainer:737) INFO: 13epoch:train:5901-6000batch: iter_time=9.025e-05, forward_time=0.288, loss_ctc=56.809, loss_att=49.551, acc=0.715, loss=51.728, backward_time=0.398, grad_norm=33.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.091, optim0_lr0=2.840e-04, train_time=1.644 +[gpub010:0/16] 2024-01-28 18:48:12,598 (trainer:737) INFO: 13epoch:train:6001-6100batch: iter_time=1.014e-04, forward_time=0.370, loss_ctc=57.648, loss_att=47.620, acc=0.732, loss=50.628, backward_time=0.508, grad_norm=31.340, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.104, optim0_lr0=2.839e-04, train_time=1.593 +[gpub010:0/16] 2024-01-28 18:50:33,813 (trainer:737) INFO: 13epoch:train:6101-6200batch: iter_time=8.781e-05, forward_time=0.292, loss_ctc=59.354, loss_att=54.881, acc=0.734, loss=56.223, backward_time=0.402, grad_norm=30.647, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.839e-04, train_time=1.413 +[gpub010:0/16] 2024-01-28 18:52:04,542 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-01-28 18:52:23,130 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 18:52:26,593 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 18:52:26,593 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-01-28 18:52:26,600 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 18:58:38,938 (trainer:737) INFO: 13epoch:train:6201-6300batch: iter_time=3.340, forward_time=0.292, loss_ctc=65.626, loss_att=59.977, acc=0.694, loss=61.672, backward_time=0.403, grad_norm=38.685, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.838e-04, train_time=4.851 +[gpub010:0/16] 2024-01-28 19:01:14,746 (trainer:737) INFO: 13epoch:train:6301-6400batch: iter_time=8.035e-05, forward_time=0.390, loss_ctc=49.747, loss_att=56.351, acc=0.695, loss=54.370, backward_time=0.444, grad_norm=31.860, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.099, optim0_lr0=2.837e-04, train_time=1.558 +[gpub010:0/16] 2024-01-28 19:03:34,199 (trainer:737) INFO: 13epoch:train:6401-6500batch: iter_time=7.810e-05, forward_time=0.298, loss_ctc=49.942, loss_att=49.631, acc=0.721, loss=49.724, backward_time=0.401, grad_norm=28.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.836e-04, train_time=1.393 +[gpub010:0/16] 2024-01-28 19:06:17,820 (trainer:737) INFO: 13epoch:train:6501-6600batch: iter_time=9.536e-05, forward_time=0.294, loss_ctc=75.546, loss_att=65.480, acc=0.719, loss=68.500, backward_time=0.406, grad_norm=40.543, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.836e-04, train_time=1.637 +[gpub010:0/16] 2024-01-28 19:08:41,046 (trainer:737) INFO: 13epoch:train:6601-6700batch: iter_time=9.188e-05, forward_time=0.292, loss_ctc=62.715, loss_att=58.455, acc=0.692, loss=59.733, backward_time=0.405, grad_norm=34.122, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.835e-04, train_time=1.432 +[gpub010:0/16] 2024-01-28 19:11:20,122 (trainer:737) INFO: 13epoch:train:6701-6800batch: iter_time=9.489e-05, forward_time=0.427, loss_ctc=54.200, loss_att=44.529, acc=0.731, loss=47.430, backward_time=0.434, grad_norm=32.539, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.096, optim0_lr0=2.834e-04, train_time=1.591 +[gpub010:0/16] 2024-01-28 19:13:58,130 (trainer:737) INFO: 13epoch:train:6801-6900batch: iter_time=9.027e-05, forward_time=0.286, loss_ctc=47.294, loss_att=40.296, acc=0.733, loss=42.396, backward_time=0.396, grad_norm=29.464, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.833e-04, train_time=1.578 +[gpub010:0/16] 2024-01-28 19:16:20,485 (trainer:737) INFO: 13epoch:train:6901-7000batch: iter_time=8.516e-05, forward_time=0.292, loss_ctc=58.198, loss_att=59.947, acc=0.720, loss=59.423, backward_time=0.404, grad_norm=30.001, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.833e-04, train_time=1.425 +[gpub010:0/16] 2024-01-28 19:18:39,276 (trainer:737) INFO: 13epoch:train:7001-7100batch: iter_time=8.240e-05, forward_time=0.288, loss_ctc=44.247, loss_att=49.019, acc=0.733, loss=47.587, backward_time=0.401, grad_norm=25.816, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.832e-04, train_time=1.388 +[gpub010:0/16] 2024-01-28 19:21:21,454 (trainer:737) INFO: 13epoch:train:7101-7200batch: iter_time=8.652e-05, forward_time=0.391, loss_ctc=63.513, loss_att=50.183, acc=0.736, loss=54.182, backward_time=0.425, grad_norm=34.335, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.098, optim0_lr0=2.831e-04, train_time=1.621 +[gpub010:0/16] 2024-01-28 19:23:58,345 (trainer:737) INFO: 13epoch:train:7201-7300batch: iter_time=8.422e-05, forward_time=0.289, loss_ctc=54.810, loss_att=49.236, acc=0.708, loss=50.908, backward_time=0.398, grad_norm=33.155, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.830e-04, train_time=1.568 +[gpub010:0/16] 2024-01-28 19:26:12,968 (trainer:737) INFO: 13epoch:train:7301-7400batch: iter_time=8.096e-05, forward_time=0.291, loss_ctc=54.383, loss_att=46.342, acc=0.749, loss=48.754, backward_time=0.404, grad_norm=29.019, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.830e-04, train_time=1.347 +[gpub010:0/16] 2024-01-28 19:28:35,912 (trainer:737) INFO: 13epoch:train:7401-7500batch: iter_time=8.245e-05, forward_time=0.293, loss_ctc=65.281, loss_att=63.179, acc=0.709, loss=63.809, backward_time=0.406, grad_norm=36.541, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.829e-04, train_time=1.429 +[gpub010:0/16] 2024-01-28 19:28:55,940 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-01-28 19:29:14,522 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 19:29:18,045 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 19:29:18,045 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-01-28 19:29:18,085 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 19:36:12,250 (trainer:737) INFO: 13epoch:train:7501-7600batch: iter_time=3.149, forward_time=0.349, loss_ctc=60.377, loss_att=56.856, acc=0.700, loss=57.912, backward_time=0.409, grad_norm=34.993, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.093, optim0_lr0=2.828e-04, train_time=4.562 +[gpub010:0/16] 2024-01-28 19:38:37,210 (trainer:737) INFO: 13epoch:train:7601-7700batch: iter_time=7.672e-05, forward_time=0.289, loss_ctc=47.418, loss_att=53.219, acc=0.702, loss=51.479, backward_time=0.401, grad_norm=29.977, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.827e-04, train_time=1.451 +[gpub010:0/16] 2024-01-28 19:41:20,326 (trainer:737) INFO: 13epoch:train:7701-7800batch: iter_time=8.439e-05, forward_time=0.300, loss_ctc=65.150, loss_att=57.362, acc=0.714, loss=59.698, backward_time=0.416, grad_norm=36.886, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.093, optim0_lr0=2.827e-04, train_time=1.631 +[gpub010:0/16] 2024-01-28 19:43:52,087 (trainer:737) INFO: 13epoch:train:7801-7900batch: iter_time=9.649e-05, forward_time=0.362, loss_ctc=62.873, loss_att=56.750, acc=0.726, loss=58.587, backward_time=0.447, grad_norm=35.601, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.098, optim0_lr0=2.826e-04, train_time=1.517 +[gpub010:0/16] 2024-01-28 19:46:24,625 (trainer:737) INFO: 13epoch:train:7901-8000batch: iter_time=8.346e-05, forward_time=0.288, loss_ctc=63.200, loss_att=54.878, acc=0.698, loss=57.375, backward_time=0.400, grad_norm=37.831, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.091, optim0_lr0=2.825e-04, train_time=1.525 +[gpub010:0/16] 2024-01-28 19:48:56,551 (trainer:737) INFO: 13epoch:train:8001-8100batch: iter_time=8.925e-05, forward_time=0.285, loss_ctc=43.991, loss_att=38.660, acc=0.732, loss=40.259, backward_time=0.396, grad_norm=27.311, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.824e-04, train_time=1.520 +[gpub010:0/16] 2024-01-28 19:51:30,001 (trainer:737) INFO: 13epoch:train:8101-8200batch: iter_time=8.087e-05, forward_time=0.376, loss_ctc=57.917, loss_att=55.459, acc=0.715, loss=56.196, backward_time=0.448, grad_norm=31.065, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.824e-04, train_time=1.534 +[gpub010:0/16] 2024-01-28 19:54:03,511 (trainer:737) INFO: 13epoch:train:8201-8300batch: iter_time=8.135e-05, forward_time=0.289, loss_ctc=49.255, loss_att=47.442, acc=0.750, loss=47.986, backward_time=0.401, grad_norm=26.428, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.823e-04, train_time=1.535 +[gpub010:0/16] 2024-01-28 19:56:28,573 (trainer:737) INFO: 13epoch:train:8301-8400batch: iter_time=8.062e-05, forward_time=0.291, loss_ctc=56.948, loss_att=55.365, acc=0.729, loss=55.840, backward_time=0.402, grad_norm=29.898, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.822e-04, train_time=1.448 +[gpub010:0/16] 2024-01-28 19:59:14,672 (trainer:737) INFO: 13epoch:train:8401-8500batch: iter_time=9.734e-05, forward_time=0.343, loss_ctc=56.655, loss_att=48.997, acc=0.717, loss=51.294, backward_time=0.437, grad_norm=33.028, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.097, optim0_lr0=2.821e-04, train_time=1.663 +[gpub010:0/16] 2024-01-28 20:01:41,223 (trainer:737) INFO: 13epoch:train:8501-8600batch: iter_time=8.348e-05, forward_time=0.291, loss_ctc=58.178, loss_att=47.280, acc=0.734, loss=50.549, backward_time=0.403, grad_norm=33.983, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.821e-04, train_time=1.465 +[gpub010:0/16] 2024-01-28 20:04:13,744 (trainer:737) INFO: 13epoch:train:8601-8700batch: iter_time=8.084e-05, forward_time=0.291, loss_ctc=59.326, loss_att=54.920, acc=0.736, loss=56.242, backward_time=0.404, grad_norm=33.104, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.820e-04, train_time=1.525 +[gpub010:0/16] 2024-01-28 20:05:44,451 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-01-28 20:06:03,300 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 20:06:07,148 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 20:06:07,148 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-01-28 20:06:07,151 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 20:12:24,469 (trainer:737) INFO: 13epoch:train:8701-8800batch: iter_time=3.440, forward_time=0.364, loss_ctc=64.510, loss_att=59.058, acc=0.696, loss=60.693, backward_time=0.415, grad_norm=38.870, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.093, optim0_lr0=2.819e-04, train_time=4.907 +[gpub010:0/16] 2024-01-28 20:14:53,350 (trainer:737) INFO: 13epoch:train:8801-8900batch: iter_time=8.183e-05, forward_time=0.287, loss_ctc=49.298, loss_att=55.333, acc=0.697, loss=53.522, backward_time=0.399, grad_norm=32.629, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.818e-04, train_time=1.489 +[gpub010:0/16] 2024-01-28 20:17:06,602 (trainer:737) INFO: 13epoch:train:8901-9000batch: iter_time=7.615e-05, forward_time=0.288, loss_ctc=49.567, loss_att=49.313, acc=0.724, loss=49.389, backward_time=0.402, grad_norm=28.814, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.818e-04, train_time=1.332 +[gpub010:0/16] 2024-01-28 20:19:58,630 (trainer:737) INFO: 13epoch:train:9001-9100batch: iter_time=8.349e-05, forward_time=0.336, loss_ctc=74.111, loss_att=64.901, acc=0.721, loss=67.664, backward_time=0.467, grad_norm=41.567, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.098, optim0_lr0=2.817e-04, train_time=1.720 +[gpub010:0/16] 2024-01-28 20:22:10,286 (trainer:737) INFO: 13epoch:train:9101-9200batch: iter_time=7.856e-05, forward_time=0.292, loss_ctc=61.945, loss_att=57.953, acc=0.694, loss=59.151, backward_time=0.404, grad_norm=35.395, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.816e-04, train_time=1.316 +[gpub010:0/16] 2024-01-28 20:24:45,565 (trainer:737) INFO: 13epoch:train:9201-9300batch: iter_time=7.968e-05, forward_time=0.288, loss_ctc=53.822, loss_att=43.706, acc=0.732, loss=46.741, backward_time=0.400, grad_norm=32.851, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.815e-04, train_time=1.553 +[gpub010:0/16] 2024-01-28 20:27:34,441 (trainer:737) INFO: 13epoch:train:9301-9400batch: iter_time=8.252e-05, forward_time=0.286, loss_ctc=47.383, loss_att=40.069, acc=0.736, loss=42.263, backward_time=0.396, grad_norm=27.698, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.815e-04, train_time=1.689 +[gpub010:0/16] 2024-01-28 20:30:07,537 (trainer:737) INFO: 13epoch:train:9401-9500batch: iter_time=8.056e-05, forward_time=0.392, loss_ctc=58.017, loss_att=59.409, acc=0.723, loss=58.992, backward_time=0.427, grad_norm=29.873, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.104, optim0_lr0=2.814e-04, train_time=1.530 +[gpub010:0/16] 2024-01-28 20:32:39,637 (trainer:737) INFO: 13epoch:train:9501-9600batch: iter_time=8.092e-05, forward_time=0.287, loss_ctc=43.701, loss_att=48.446, acc=0.735, loss=47.022, backward_time=0.398, grad_norm=25.348, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.813e-04, train_time=1.521 +[gpub010:0/16] 2024-01-28 20:35:02,834 (trainer:737) INFO: 13epoch:train:9601-9700batch: iter_time=8.068e-05, forward_time=0.290, loss_ctc=62.641, loss_att=49.762, acc=0.737, loss=53.626, backward_time=0.402, grad_norm=33.173, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.812e-04, train_time=1.432 +[gpub010:0/16] 2024-01-28 20:37:37,387 (trainer:737) INFO: 13epoch:train:9701-9800batch: iter_time=7.859e-05, forward_time=0.288, loss_ctc=54.229, loss_att=48.806, acc=0.711, loss=50.433, backward_time=0.400, grad_norm=32.751, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.812e-04, train_time=1.545 +[gpub010:0/16] 2024-01-28 20:40:31,989 (trainer:737) INFO: 13epoch:train:9801-9900batch: iter_time=8.788e-05, forward_time=0.392, loss_ctc=54.367, loss_att=46.064, acc=0.751, loss=48.555, backward_time=0.442, grad_norm=28.987, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.101, optim0_lr0=2.811e-04, train_time=1.746 +[gpub010:0/16] 2024-01-28 20:43:09,116 (trainer:737) INFO: 13epoch:train:9901-10000batch: iter_time=7.891e-05, forward_time=0.311, loss_ctc=64.899, loss_att=63.057, acc=0.711, loss=63.609, backward_time=0.403, grad_norm=37.033, clip=100.000, loss_scale=1.298e+33, optim_step_time=0.091, optim0_lr0=2.810e-04, train_time=1.571 +[gpub010:0/16] 2024-01-28 20:43:29,196 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-01-28 20:43:47,694 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 20:43:51,252 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 20:43:51,252 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-01-28 20:43:51,318 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 20:50:57,584 (trainer:737) INFO: 13epoch:train:10001-10100batch: iter_time=3.201, forward_time=0.418, loss_ctc=59.710, loss_att=56.312, acc=0.701, loss=57.331, backward_time=0.421, grad_norm=36.433, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.809e-04, train_time=4.684 +[gpub010:0/16] 2024-01-28 20:53:36,773 (trainer:737) INFO: 13epoch:train:10101-10200batch: iter_time=8.088e-05, forward_time=0.288, loss_ctc=47.401, loss_att=52.140, acc=0.706, loss=50.718, backward_time=0.400, grad_norm=30.501, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.809e-04, train_time=1.591 +[gpub010:0/16] 2024-01-28 20:56:08,764 (trainer:737) INFO: 13epoch:train:10201-10300batch: iter_time=8.133e-05, forward_time=0.402, loss_ctc=63.905, loss_att=56.180, acc=0.717, loss=58.498, backward_time=0.427, grad_norm=35.701, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=2.808e-04, train_time=1.520 +[gpub010:0/16] 2024-01-28 20:58:43,140 (trainer:737) INFO: 13epoch:train:10301-10400batch: iter_time=9.005e-05, forward_time=0.293, loss_ctc=62.421, loss_att=56.155, acc=0.727, loss=58.035, backward_time=0.406, grad_norm=35.034, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.807e-04, train_time=1.544 +[gpub010:0/16] 2024-01-28 21:01:12,040 (trainer:737) INFO: 13epoch:train:10401-10500batch: iter_time=8.408e-05, forward_time=0.289, loss_ctc=62.734, loss_att=53.909, acc=0.701, loss=56.557, backward_time=0.400, grad_norm=38.108, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.806e-04, train_time=1.489 +[gpub010:0/16] 2024-01-28 21:03:37,961 (trainer:737) INFO: 13epoch:train:10501-10600batch: iter_time=8.048e-05, forward_time=0.345, loss_ctc=43.532, loss_att=38.337, acc=0.733, loss=39.895, backward_time=0.441, grad_norm=28.004, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=2.806e-04, train_time=1.459 +[gpub010:0/16] 2024-01-28 21:06:18,705 (trainer:737) INFO: 13epoch:train:10601-10700batch: iter_time=8.538e-05, forward_time=0.305, loss_ctc=57.400, loss_att=54.596, acc=0.717, loss=55.437, backward_time=0.410, grad_norm=31.254, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.805e-04, train_time=1.607 +[gpub010:0/16] 2024-01-28 21:08:30,322 (trainer:737) INFO: 13epoch:train:10701-10800batch: iter_time=9.212e-05, forward_time=0.289, loss_ctc=48.736, loss_att=47.181, acc=0.752, loss=47.648, backward_time=0.403, grad_norm=27.343, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.804e-04, train_time=1.316 +[gpub010:0/16] 2024-01-28 21:11:14,561 (trainer:737) INFO: 13epoch:train:10801-10900batch: iter_time=8.586e-05, forward_time=0.290, loss_ctc=56.227, loss_att=54.946, acc=0.730, loss=55.331, backward_time=0.403, grad_norm=30.564, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.803e-04, train_time=1.642 +[gpub010:0/16] 2024-01-28 21:13:46,652 (trainer:737) INFO: 13epoch:train:10901-11000batch: iter_time=7.681e-04, forward_time=0.397, loss_ctc=55.368, loss_att=48.128, acc=0.720, loss=50.300, backward_time=0.468, grad_norm=31.629, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.803e-04, train_time=1.521 +[gpub010:0/16] 2024-01-28 21:16:28,624 (trainer:737) INFO: 13epoch:train:11001-11100batch: iter_time=8.654e-05, forward_time=0.309, loss_ctc=56.699, loss_att=46.438, acc=0.737, loss=49.516, backward_time=0.403, grad_norm=31.357, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.802e-04, train_time=1.620 +[gpub010:0/16] 2024-01-28 21:18:55,852 (trainer:737) INFO: 13epoch:train:11101-11200batch: iter_time=8.510e-05, forward_time=0.292, loss_ctc=58.723, loss_att=54.597, acc=0.737, loss=55.835, backward_time=0.404, grad_norm=31.190, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.801e-04, train_time=1.472 +[gpub010:0/16] 2024-01-28 21:20:30,703 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-01-28 21:20:49,342 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 21:20:52,908 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 21:20:52,908 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-01-28 21:20:52,912 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 21:27:22,312 (trainer:737) INFO: 13epoch:train:11201-11300batch: iter_time=3.499, forward_time=0.412, loss_ctc=64.301, loss_att=60.724, acc=0.697, loss=61.797, backward_time=0.428, grad_norm=41.112, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.801e-04, train_time=5.064 +[gpub010:0/16] 2024-01-28 21:29:43,493 (trainer:737) INFO: 13epoch:train:11301-11400batch: iter_time=8.157e-05, forward_time=0.288, loss_ctc=48.620, loss_att=56.351, acc=0.704, loss=54.032, backward_time=0.400, grad_norm=33.973, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.090, optim0_lr0=2.800e-04, train_time=1.412 +[gpub010:0/16] 2024-01-28 21:32:14,036 (trainer:737) INFO: 13epoch:train:11401-11500batch: iter_time=8.073e-05, forward_time=0.289, loss_ctc=49.510, loss_att=51.508, acc=0.721, loss=50.908, backward_time=0.400, grad_norm=28.792, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.090, optim0_lr0=2.799e-04, train_time=1.505 +[gpub010:0/16] 2024-01-28 21:35:07,326 (trainer:737) INFO: 13epoch:train:11501-11600batch: iter_time=8.277e-05, forward_time=0.376, loss_ctc=73.333, loss_att=65.240, acc=0.729, loss=67.668, backward_time=0.452, grad_norm=41.372, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.798e-04, train_time=1.733 +[gpub010:0/16] 2024-01-28 21:37:46,412 (trainer:737) INFO: 13epoch:train:11601-11700batch: iter_time=8.148e-05, forward_time=0.293, loss_ctc=61.847, loss_att=60.008, acc=0.703, loss=60.560, backward_time=0.404, grad_norm=33.807, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.798e-04, train_time=1.591 +[gpub010:0/16] 2024-01-28 21:40:20,496 (trainer:737) INFO: 13epoch:train:11701-11800batch: iter_time=8.618e-05, forward_time=0.383, loss_ctc=52.894, loss_att=44.639, acc=0.739, loss=47.115, backward_time=0.459, grad_norm=31.741, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.797e-04, train_time=1.540 +[gpub010:0/16] 2024-01-28 21:42:45,397 (trainer:737) INFO: 13epoch:train:11801-11900batch: iter_time=8.312e-05, forward_time=0.286, loss_ctc=47.160, loss_att=40.264, acc=0.742, loss=42.333, backward_time=0.397, grad_norm=28.750, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.796e-04, train_time=1.449 +[gpub010:0/16] 2024-01-28 21:45:36,101 (trainer:737) INFO: 13epoch:train:11901-12000batch: iter_time=8.175e-05, forward_time=0.294, loss_ctc=57.571, loss_att=59.916, acc=0.741, loss=59.212, backward_time=0.406, grad_norm=29.254, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=2.795e-04, train_time=1.707 +[gpub010:0/16] 2024-01-28 21:48:10,470 (trainer:737) INFO: 13epoch:train:12001-12100batch: iter_time=0.003, forward_time=0.393, loss_ctc=44.022, loss_att=49.269, acc=0.742, loss=47.695, backward_time=0.446, grad_norm=25.166, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.795e-04, train_time=1.543 +[gpub010:0/16] 2024-01-28 21:50:52,795 (trainer:737) INFO: 13epoch:train:12101-12200batch: iter_time=8.311e-05, forward_time=0.290, loss_ctc=61.974, loss_att=49.896, acc=0.744, loss=53.519, backward_time=0.402, grad_norm=32.852, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.794e-04, train_time=1.623 +[gpub010:0/16] 2024-01-28 21:53:33,545 (trainer:737) INFO: 13epoch:train:12201-12300batch: iter_time=8.850e-05, forward_time=0.290, loss_ctc=53.569, loss_att=49.727, acc=0.718, loss=50.879, backward_time=0.400, grad_norm=32.733, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.793e-04, train_time=1.608 +[gpub010:0/16] 2024-01-28 21:56:16,411 (trainer:737) INFO: 13epoch:train:12301-12400batch: iter_time=3.606e-04, forward_time=0.418, loss_ctc=54.320, loss_att=46.667, acc=0.751, loss=48.963, backward_time=0.465, grad_norm=28.693, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.793e-04, train_time=1.628 +[gpub010:0/16] 2024-01-28 21:58:43,158 (trainer:737) INFO: 13epoch:train:12401-12500batch: iter_time=8.052e-05, forward_time=0.292, loss_ctc=64.562, loss_att=63.567, acc=0.718, loss=63.866, backward_time=0.404, grad_norm=36.921, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.792e-04, train_time=1.467 +[gpub010:0/16] 2024-01-28 21:59:03,432 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-01-28 21:59:22,099 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 21:59:25,585 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 21:59:25,585 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-01-28 21:59:25,588 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 22:06:52,977 (trainer:737) INFO: 13epoch:train:12501-12600batch: iter_time=3.491, forward_time=0.349, loss_ctc=59.353, loss_att=58.168, acc=0.697, loss=58.523, backward_time=0.408, grad_norm=36.698, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.791e-04, train_time=4.898 +[gpub010:0/16] 2024-01-28 22:09:13,666 (trainer:737) INFO: 13epoch:train:12601-12700batch: iter_time=7.677e-05, forward_time=0.288, loss_ctc=46.722, loss_att=52.096, acc=0.709, loss=50.484, backward_time=0.401, grad_norm=30.559, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.790e-04, train_time=1.407 +[gpub010:0/16] 2024-01-28 22:12:11,885 (trainer:737) INFO: 13epoch:train:12701-12800batch: iter_time=7.872e-05, forward_time=0.396, loss_ctc=63.273, loss_att=56.541, acc=0.717, loss=58.561, backward_time=0.457, grad_norm=35.627, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.790e-04, train_time=1.782 +[gpub010:0/16] 2024-01-28 22:14:48,121 (trainer:737) INFO: 13epoch:train:12801-12900batch: iter_time=7.845e-05, forward_time=0.294, loss_ctc=61.934, loss_att=55.813, acc=0.729, loss=57.649, backward_time=0.406, grad_norm=34.446, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.789e-04, train_time=1.563 +[gpub010:0/16] 2024-01-28 22:17:27,558 (trainer:737) INFO: 13epoch:train:12901-13000batch: iter_time=7.684e-05, forward_time=0.289, loss_ctc=61.774, loss_att=54.269, acc=0.700, loss=56.520, backward_time=0.404, grad_norm=37.276, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.090, optim0_lr0=2.788e-04, train_time=1.594 +[gpub010:0/16] 2024-01-28 22:20:00,782 (trainer:737) INFO: 13epoch:train:13001-13100batch: iter_time=3.051e-04, forward_time=0.390, loss_ctc=43.209, loss_att=38.676, acc=0.733, loss=40.036, backward_time=0.450, grad_norm=27.685, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.787e-04, train_time=1.532 +[gpub010:0/16] 2024-01-28 22:22:36,454 (trainer:737) INFO: 13epoch:train:13101-13200batch: iter_time=7.705e-05, forward_time=0.291, loss_ctc=57.853, loss_att=54.844, acc=0.718, loss=55.746, backward_time=0.401, grad_norm=31.827, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.787e-04, train_time=1.557 +[gpub010:0/16] 2024-01-28 22:24:59,690 (trainer:737) INFO: 13epoch:train:13201-13300batch: iter_time=7.249e-05, forward_time=0.292, loss_ctc=48.457, loss_att=47.336, acc=0.753, loss=47.672, backward_time=0.400, grad_norm=25.895, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.786e-04, train_time=1.432 +[gpub010:0/16] 2024-01-28 22:27:55,888 (trainer:737) INFO: 13epoch:train:13301-13400batch: iter_time=9.152e-04, forward_time=0.366, loss_ctc=55.520, loss_att=54.889, acc=0.731, loss=55.078, backward_time=0.478, grad_norm=30.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.785e-04, train_time=1.762 +[gpub010:0/16] 2024-01-28 22:30:11,507 (trainer:737) INFO: 13epoch:train:13401-13500batch: iter_time=8.273e-05, forward_time=0.289, loss_ctc=56.365, loss_att=48.875, acc=0.719, loss=51.122, backward_time=0.400, grad_norm=33.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.785e-04, train_time=1.356 +[gpub010:0/16] 2024-01-28 22:33:03,622 (trainer:737) INFO: 13epoch:train:13501-13600batch: iter_time=3.278e-04, forward_time=0.363, loss_ctc=56.265, loss_att=46.559, acc=0.735, loss=49.471, backward_time=0.449, grad_norm=32.138, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.784e-04, train_time=1.720 +[gpub010:0/16] 2024-01-28 22:35:39,518 (trainer:737) INFO: 13epoch:train:13601-13700batch: iter_time=8.175e-05, forward_time=0.291, loss_ctc=58.174, loss_att=54.491, acc=0.738, loss=55.596, backward_time=0.402, grad_norm=32.577, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.783e-04, train_time=1.560 +[gpub010:0/16] 2024-01-28 22:37:09,350 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-01-28 22:37:28,222 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 22:37:31,833 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 22:37:31,833 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-01-28 22:37:31,836 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-28 22:43:41,307 (trainer:737) INFO: 13epoch:train:13701-13800batch: iter_time=3.369, forward_time=0.351, loss_ctc=63.023, loss_att=59.748, acc=0.698, loss=60.730, backward_time=0.413, grad_norm=36.998, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.782e-04, train_time=4.818 +[gpub010:0/16] 2024-01-28 22:45:57,789 (trainer:737) INFO: 13epoch:train:13801-13900batch: iter_time=8.122e-05, forward_time=0.289, loss_ctc=48.553, loss_att=54.708, acc=0.710, loss=52.861, backward_time=0.401, grad_norm=31.054, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.782e-04, train_time=1.364 +[gpub010:0/16] 2024-01-28 22:48:44,903 (trainer:737) INFO: 13epoch:train:13901-14000batch: iter_time=7.896e-05, forward_time=0.289, loss_ctc=49.170, loss_att=50.544, acc=0.723, loss=50.132, backward_time=0.402, grad_norm=28.003, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.781e-04, train_time=1.671 +[gpub010:0/16] 2024-01-28 22:51:21,783 (trainer:737) INFO: 13epoch:train:14001-14100batch: iter_time=1.832e-04, forward_time=0.365, loss_ctc=73.098, loss_att=64.773, acc=0.731, loss=67.270, backward_time=0.463, grad_norm=41.506, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.780e-04, train_time=1.569 +[gpub010:0/16] 2024-01-28 22:54:15,818 (trainer:737) INFO: 13epoch:train:14101-14200batch: iter_time=8.080e-05, forward_time=0.293, loss_ctc=61.247, loss_att=60.498, acc=0.701, loss=60.722, backward_time=0.407, grad_norm=34.434, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=2.780e-04, train_time=1.739 +[gpub010:0/16] 2024-01-28 22:56:49,375 (trainer:737) INFO: 13epoch:train:14201-14300batch: iter_time=8.535e-05, forward_time=0.384, loss_ctc=52.107, loss_att=44.315, acc=0.740, loss=46.652, backward_time=0.440, grad_norm=32.212, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.779e-04, train_time=1.536 +[gpub010:0/16] 2024-01-28 22:57:47,903 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-28 22:59:19,584 (trainer:737) INFO: 13epoch:train:14301-14400batch: iter_time=8.190e-05, forward_time=0.289, loss_ctc=46.658, loss_att=39.692, acc=0.745, loss=41.782, backward_time=0.397, grad_norm=27.066, clip=100.000, loss_scale=7.343e+33, optim_step_time=0.091, optim0_lr0=2.778e-04, train_time=1.503 +[gpub010:0/16] 2024-01-28 23:01:58,990 (trainer:737) INFO: 13epoch:train:14401-14500batch: iter_time=8.272e-05, forward_time=0.294, loss_ctc=58.129, loss_att=60.026, acc=0.742, loss=59.457, backward_time=0.405, grad_norm=30.574, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.777e-04, train_time=1.593 +[gpub010:0/16] 2024-01-28 23:04:25,455 (trainer:737) INFO: 13epoch:train:14501-14600batch: iter_time=2.568e-04, forward_time=0.391, loss_ctc=43.535, loss_att=48.917, acc=0.745, loss=47.302, backward_time=0.432, grad_norm=26.297, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.777e-04, train_time=1.464 +[gpub010:0/16] 2024-01-28 23:06:57,007 (trainer:737) INFO: 13epoch:train:14601-14700batch: iter_time=8.356e-05, forward_time=0.290, loss_ctc=61.690, loss_att=49.399, acc=0.747, loss=53.087, backward_time=0.401, grad_norm=32.139, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.776e-04, train_time=1.516 +[gpub010:0/16] 2024-01-28 23:09:46,784 (trainer:737) INFO: 13epoch:train:14701-14800batch: iter_time=7.049e-04, forward_time=0.405, loss_ctc=53.258, loss_att=49.566, acc=0.718, loss=50.673, backward_time=0.438, grad_norm=32.056, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.775e-04, train_time=1.698 +[gpub010:0/16] 2024-01-28 23:12:13,087 (trainer:737) INFO: 13epoch:train:14801-14900batch: iter_time=8.989e-05, forward_time=0.303, loss_ctc=53.643, loss_att=45.338, acc=0.757, loss=47.830, backward_time=0.405, grad_norm=28.302, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.775e-04, train_time=1.463 +[gpub010:0/16] 2024-01-28 23:14:48,634 (trainer:737) INFO: 13epoch:train:14901-15000batch: iter_time=8.733e-05, forward_time=0.294, loss_ctc=63.943, loss_att=63.077, acc=0.721, loss=63.336, backward_time=0.405, grad_norm=36.541, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.774e-04, train_time=1.555 +[gpub010:0/16] 2024-01-28 23:54:33,877 (trainer:343) INFO: 13epoch results: [train] iter_time=0.276, forward_time=0.336, loss_ctc=57.285, loss_att=53.229, acc=0.722, loss=54.446, backward_time=0.422, grad_norm=32.776, clip=100.000, loss_scale=1.838e+33, optim_step_time=0.094, optim0_lr0=2.829e-04, train_time=1.954, time=8 hours, 9 minutes and 4 seconds, total_count=225000, gpu_max_cached_mem_GB=42.072, [valid] loss_ctc=45.272, cer_ctc=0.233, loss_att=44.548, acc=0.636, cer=0.355, wer=0.996, loss=44.765, time=39 minutes and 20.93 seconds, total_count=70065, gpu_max_cached_mem_GB=42.072 +[gpub010:0/16] 2024-01-28 23:54:51,648 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub010:0/16] 2024-01-28 23:54:51,682 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/8epoch.pth +[gpub010:0/16] 2024-01-28 23:54:51,683 (trainer:272) INFO: 14/45epoch started. Estimated time to finish: 1 week, 4 days and 17 hours +[gpub010:0/16] 2024-01-28 23:54:51,693 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-01-28 23:55:09,443 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-28 23:55:12,811 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-28 23:55:12,811 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-01-28 23:55:12,815 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 00:02:32,552 (trainer:737) INFO: 14epoch:train:1-100batch: iter_time=3.164, forward_time=0.354, loss_ctc=60.375, loss_att=66.442, acc=0.706, loss=64.622, backward_time=0.413, grad_norm=35.718, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.773e-04, train_time=4.608 +[gpub010:0/16] 2024-01-29 00:05:10,422 (trainer:737) INFO: 14epoch:train:101-200batch: iter_time=8.944e-05, forward_time=0.292, loss_ctc=63.167, loss_att=57.009, acc=0.700, loss=58.856, backward_time=0.406, grad_norm=35.593, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.772e-04, train_time=1.578 +[gpub010:0/16] 2024-01-29 00:07:59,768 (trainer:737) INFO: 14epoch:train:201-300batch: iter_time=2.635e-04, forward_time=0.377, loss_ctc=52.959, loss_att=56.826, acc=0.704, loss=55.666, backward_time=0.435, grad_norm=32.072, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.772e-04, train_time=1.693 +[gpub010:0/16] 2024-01-29 00:10:34,724 (trainer:737) INFO: 14epoch:train:301-400batch: iter_time=2.620e-04, forward_time=0.294, loss_ctc=51.229, loss_att=46.100, acc=0.729, loss=47.639, backward_time=0.401, grad_norm=30.270, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.771e-04, train_time=1.550 +[gpub010:0/16] 2024-01-29 00:13:04,867 (trainer:737) INFO: 14epoch:train:401-500batch: iter_time=8.509e-05, forward_time=0.349, loss_ctc=57.432, loss_att=44.830, acc=0.722, loss=48.611, backward_time=0.423, grad_norm=34.286, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.770e-04, train_time=1.501 +[gpub010:0/16] 2024-01-29 00:15:53,365 (trainer:737) INFO: 14epoch:train:501-600batch: iter_time=8.824e-05, forward_time=0.324, loss_ctc=56.293, loss_att=52.099, acc=0.711, loss=53.357, backward_time=0.413, grad_norm=35.006, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.770e-04, train_time=1.685 +[gpub010:0/16] 2024-01-29 00:18:12,725 (trainer:737) INFO: 14epoch:train:601-700batch: iter_time=2.599e-04, forward_time=0.293, loss_ctc=52.805, loss_att=54.229, acc=0.704, loss=53.802, backward_time=0.406, grad_norm=31.371, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.769e-04, train_time=1.394 +[gpub010:0/16] 2024-01-29 00:21:04,683 (trainer:737) INFO: 14epoch:train:701-800batch: iter_time=3.561e-04, forward_time=0.389, loss_ctc=53.525, loss_att=47.530, acc=0.727, loss=49.329, backward_time=0.425, grad_norm=33.649, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.768e-04, train_time=1.719 +[gpub010:0/16] 2024-01-29 00:23:42,638 (trainer:737) INFO: 14epoch:train:801-900batch: iter_time=8.118e-05, forward_time=0.297, loss_ctc=51.713, loss_att=46.619, acc=0.736, loss=48.147, backward_time=0.406, grad_norm=31.181, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.767e-04, train_time=1.579 +[gpub010:0/16] 2024-01-29 00:26:16,653 (trainer:737) INFO: 14epoch:train:901-1000batch: iter_time=4.201e-04, forward_time=0.389, loss_ctc=62.399, loss_att=48.445, acc=0.747, loss=52.631, backward_time=0.447, grad_norm=41.711, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.767e-04, train_time=1.540 +[gpub010:0/16] 2024-01-29 00:29:00,415 (trainer:737) INFO: 14epoch:train:1001-1100batch: iter_time=9.015e-05, forward_time=0.289, loss_ctc=52.943, loss_att=46.822, acc=0.728, loss=48.658, backward_time=0.404, grad_norm=29.091, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.766e-04, train_time=1.637 +[gpub010:0/16] 2024-01-29 00:31:25,868 (trainer:737) INFO: 14epoch:train:1101-1200batch: iter_time=9.333e-05, forward_time=0.342, loss_ctc=55.514, loss_att=53.064, acc=0.711, loss=53.799, backward_time=0.413, grad_norm=33.431, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.765e-04, train_time=1.454 +[gpub010:0/16] 2024-01-29 00:33:14,917 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-01-29 00:33:33,609 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 00:33:37,105 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 00:33:37,105 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-01-29 00:33:37,109 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 00:39:48,645 (trainer:737) INFO: 14epoch:train:1201-1300batch: iter_time=3.271, forward_time=0.349, loss_ctc=51.993, loss_att=57.205, acc=0.713, loss=55.641, backward_time=0.420, grad_norm=29.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.765e-04, train_time=5.028 +[gpub010:0/16] 2024-01-29 00:42:20,978 (trainer:737) INFO: 14epoch:train:1301-1400batch: iter_time=8.350e-05, forward_time=0.299, loss_ctc=62.745, loss_att=61.150, acc=0.707, loss=61.629, backward_time=0.408, grad_norm=36.202, clip=100.000, loss_scale=8.204e+33, optim_step_time=0.093, optim0_lr0=2.764e-04, train_time=1.523 +[gpub010:0/16] 2024-01-29 00:44:58,145 (trainer:737) INFO: 14epoch:train:1401-1500batch: iter_time=7.927e-05, forward_time=0.386, loss_ctc=55.769, loss_att=55.534, acc=0.698, loss=55.605, backward_time=0.443, grad_norm=31.636, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.763e-04, train_time=1.571 +[gpub010:0/16] 2024-01-29 00:47:29,097 (trainer:737) INFO: 14epoch:train:1501-1600batch: iter_time=8.031e-05, forward_time=0.293, loss_ctc=53.039, loss_att=52.522, acc=0.732, loss=52.677, backward_time=0.404, grad_norm=30.209, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.763e-04, train_time=1.509 +[gpub010:0/16] 2024-01-29 00:49:59,697 (trainer:737) INFO: 14epoch:train:1601-1700batch: iter_time=3.671e-04, forward_time=0.307, loss_ctc=50.944, loss_att=41.130, acc=0.742, loss=44.074, backward_time=0.402, grad_norm=30.556, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.762e-04, train_time=1.506 +[gpub010:0/16] 2024-01-29 00:52:28,052 (trainer:737) INFO: 14epoch:train:1701-1800batch: iter_time=8.602e-05, forward_time=0.389, loss_ctc=58.038, loss_att=48.138, acc=0.707, loss=51.108, backward_time=0.444, grad_norm=34.777, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.761e-04, train_time=1.482 +[gpub010:0/16] 2024-01-29 00:54:59,634 (trainer:737) INFO: 14epoch:train:1801-1900batch: iter_time=8.156e-05, forward_time=0.292, loss_ctc=56.117, loss_att=59.793, acc=0.703, loss=58.690, backward_time=0.403, grad_norm=35.480, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.760e-04, train_time=1.516 +[gpub010:0/16] 2024-01-29 00:57:34,292 (trainer:737) INFO: 14epoch:train:1901-2000batch: iter_time=9.819e-04, forward_time=0.293, loss_ctc=51.478, loss_att=48.121, acc=0.713, loss=49.128, backward_time=0.403, grad_norm=30.965, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.760e-04, train_time=1.546 +[gpub010:0/16] 2024-01-29 01:00:07,922 (trainer:737) INFO: 14epoch:train:2001-2100batch: iter_time=8.073e-05, forward_time=0.403, loss_ctc=50.377, loss_att=42.027, acc=0.750, loss=44.532, backward_time=0.442, grad_norm=27.837, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.759e-04, train_time=1.537 +[gpub010:0/16] 2024-01-29 01:02:38,283 (trainer:737) INFO: 14epoch:train:2101-2200batch: iter_time=7.962e-05, forward_time=0.291, loss_ctc=51.859, loss_att=47.262, acc=0.746, loss=48.641, backward_time=0.402, grad_norm=28.202, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.758e-04, train_time=1.502 +[gpub010:0/16] 2024-01-29 01:05:13,895 (trainer:737) INFO: 14epoch:train:2201-2300batch: iter_time=8.148e-05, forward_time=0.293, loss_ctc=53.411, loss_att=45.546, acc=0.730, loss=47.905, backward_time=0.408, grad_norm=39.066, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.758e-04, train_time=1.557 +[gpub010:0/16] 2024-01-29 01:09:44,919 (trainer:737) INFO: 14epoch:train:2301-2400batch: iter_time=8.379e-05, forward_time=0.435, loss_ctc=58.526, loss_att=47.718, acc=0.726, loss=50.961, backward_time=0.448, grad_norm=31.113, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.757e-04, train_time=2.709 +[gpub010:0/16] 2024-01-29 01:11:58,222 (trainer:737) INFO: 14epoch:train:2401-2500batch: iter_time=7.886e-05, forward_time=0.290, loss_ctc=51.047, loss_att=53.487, acc=0.718, loss=52.755, backward_time=0.402, grad_norm=32.065, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.756e-04, train_time=1.333 +[gpub010:0/16] 2024-01-29 01:12:18,349 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-01-29 01:12:36,890 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 01:12:40,423 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 01:12:40,423 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-01-29 01:12:40,426 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 01:24:19,096 (trainer:737) INFO: 14epoch:train:2501-2600batch: iter_time=3.329, forward_time=0.300, loss_ctc=58.132, loss_att=66.475, acc=0.721, loss=63.972, backward_time=0.406, grad_norm=32.428, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.756e-04, train_time=7.409 +[gpub010:0/16] 2024-01-29 01:27:08,652 (trainer:737) INFO: 14epoch:train:2601-2700batch: iter_time=8.856e-05, forward_time=0.390, loss_ctc=61.492, loss_att=58.414, acc=0.706, loss=59.337, backward_time=0.460, grad_norm=35.242, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.755e-04, train_time=1.694 +[gpub010:0/16] 2024-01-29 01:29:46,833 (trainer:737) INFO: 14epoch:train:2701-2800batch: iter_time=9.689e-05, forward_time=0.298, loss_ctc=51.432, loss_att=57.671, acc=0.710, loss=55.799, backward_time=0.404, grad_norm=29.704, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.754e-04, train_time=1.582 +[gpub010:0/16] 2024-01-29 01:32:24,177 (trainer:737) INFO: 14epoch:train:2801-2900batch: iter_time=9.819e-05, forward_time=0.290, loss_ctc=49.712, loss_att=46.630, acc=0.735, loss=47.555, backward_time=0.401, grad_norm=30.275, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.753e-04, train_time=1.574 +[gpub010:0/16] 2024-01-29 01:34:57,101 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 01:35:14,439 (trainer:737) INFO: 14epoch:train:2901-3000batch: iter_time=9.420e-05, forward_time=0.396, loss_ctc=54.209, loss_att=44.734, acc=0.732, loss=47.576, backward_time=0.459, grad_norm=33.024, clip=100.000, loss_scale=9.755e+33, optim_step_time=0.099, optim0_lr0=2.753e-04, train_time=1.702 +[gpub010:0/16] 2024-01-29 01:37:45,219 (trainer:737) INFO: 14epoch:train:3001-3100batch: iter_time=9.258e-05, forward_time=0.293, loss_ctc=54.370, loss_att=50.199, acc=0.728, loss=51.450, backward_time=0.404, grad_norm=30.884, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.752e-04, train_time=1.508 +[gpub010:0/16] 2024-01-29 01:40:26,782 (trainer:737) INFO: 14epoch:train:3101-3200batch: iter_time=9.808e-05, forward_time=0.291, loss_ctc=51.560, loss_att=54.905, acc=0.714, loss=53.901, backward_time=0.406, grad_norm=31.695, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.751e-04, train_time=1.615 +[gpub010:0/16] 2024-01-29 01:43:15,849 (trainer:737) INFO: 14epoch:train:3201-3300batch: iter_time=9.492e-05, forward_time=0.390, loss_ctc=52.299, loss_att=46.953, acc=0.737, loss=48.557, backward_time=0.441, grad_norm=32.154, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.751e-04, train_time=1.691 +[gpub010:0/16] 2024-01-29 01:45:49,335 (trainer:737) INFO: 14epoch:train:3301-3400batch: iter_time=9.692e-05, forward_time=0.289, loss_ctc=50.243, loss_att=45.001, acc=0.751, loss=46.574, backward_time=0.403, grad_norm=37.667, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.750e-04, train_time=1.530 +[gpub010:0/16] 2024-01-29 01:48:20,941 (trainer:737) INFO: 14epoch:train:3401-3500batch: iter_time=0.001, forward_time=0.299, loss_ctc=59.512, loss_att=49.147, acc=0.761, loss=52.257, backward_time=0.408, grad_norm=36.290, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.749e-04, train_time=1.520 +[gpub010:0/16] 2024-01-29 01:51:10,717 (trainer:737) INFO: 14epoch:train:3501-3600batch: iter_time=8.639e-05, forward_time=0.379, loss_ctc=52.151, loss_att=47.001, acc=0.734, loss=48.546, backward_time=0.460, grad_norm=29.607, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.749e-04, train_time=1.697 +[gpub010:0/16] 2024-01-29 01:53:44,491 (trainer:737) INFO: 14epoch:train:3601-3700batch: iter_time=1.622e-04, forward_time=0.290, loss_ctc=54.217, loss_att=52.350, acc=0.721, loss=52.910, backward_time=0.403, grad_norm=32.360, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.748e-04, train_time=1.536 +[gpub010:0/16] 2024-01-29 01:55:16,354 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-01-29 01:55:35,001 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 01:55:38,461 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 01:55:38,461 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-01-29 01:55:38,511 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 02:02:07,771 (trainer:737) INFO: 14epoch:train:3701-3800batch: iter_time=3.485, forward_time=0.380, loss_ctc=50.604, loss_att=55.955, acc=0.730, loss=54.350, backward_time=0.420, grad_norm=34.732, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.747e-04, train_time=5.034 +[gpub010:0/16] 2024-01-29 02:05:13,786 (trainer:737) INFO: 14epoch:train:3801-3900batch: iter_time=0.220, forward_time=0.292, loss_ctc=61.436, loss_att=63.348, acc=0.714, loss=62.775, backward_time=0.404, grad_norm=34.600, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.747e-04, train_time=1.860 +[gpub010:0/16] 2024-01-29 02:08:01,099 (trainer:737) INFO: 14epoch:train:3901-4000batch: iter_time=0.007, forward_time=0.299, loss_ctc=54.837, loss_att=55.021, acc=0.709, loss=54.966, backward_time=0.397, grad_norm=30.507, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.746e-04, train_time=1.673 +[gpub010:0/16] 2024-01-29 02:10:20,848 (trainer:737) INFO: 14epoch:train:4001-4100batch: iter_time=8.750e-05, forward_time=0.299, loss_ctc=52.102, loss_att=53.086, acc=0.738, loss=52.791, backward_time=0.408, grad_norm=30.320, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.745e-04, train_time=1.397 +[gpub010:0/16] 2024-01-29 02:13:02,111 (trainer:737) INFO: 14epoch:train:4101-4200batch: iter_time=8.850e-05, forward_time=0.391, loss_ctc=50.414, loss_att=40.694, acc=0.748, loss=43.610, backward_time=0.441, grad_norm=31.728, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.744e-04, train_time=1.613 +[gpub010:0/16] 2024-01-29 02:15:37,084 (trainer:737) INFO: 14epoch:train:4201-4300batch: iter_time=8.446e-05, forward_time=0.292, loss_ctc=56.705, loss_att=50.174, acc=0.711, loss=52.133, backward_time=0.407, grad_norm=32.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.744e-04, train_time=1.550 +[gpub010:0/16] 2024-01-29 02:18:13,908 (trainer:737) INFO: 14epoch:train:4301-4400batch: iter_time=8.864e-05, forward_time=0.342, loss_ctc=54.693, loss_att=59.062, acc=0.714, loss=57.751, backward_time=0.455, grad_norm=31.577, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.743e-04, train_time=1.568 +[gpub010:0/16] 2024-01-29 02:20:38,832 (trainer:737) INFO: 14epoch:train:4401-4500batch: iter_time=8.159e-05, forward_time=0.289, loss_ctc=50.358, loss_att=47.532, acc=0.728, loss=48.380, backward_time=0.401, grad_norm=29.415, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.742e-04, train_time=1.449 +[gpub010:0/16] 2024-01-29 02:22:58,764 (trainer:737) INFO: 14epoch:train:4501-4600batch: iter_time=8.804e-05, forward_time=0.302, loss_ctc=49.431, loss_att=41.026, acc=0.759, loss=43.547, backward_time=0.403, grad_norm=28.400, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.742e-04, train_time=1.399 +[gpub010:0/16] 2024-01-29 02:25:50,146 (trainer:737) INFO: 14epoch:train:4601-4700batch: iter_time=8.475e-05, forward_time=0.399, loss_ctc=50.714, loss_att=47.959, acc=0.761, loss=48.786, backward_time=0.426, grad_norm=27.352, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=2.741e-04, train_time=1.714 +[gpub010:0/16] 2024-01-29 02:28:10,347 (trainer:737) INFO: 14epoch:train:4701-4800batch: iter_time=8.647e-05, forward_time=0.288, loss_ctc=51.455, loss_att=44.774, acc=0.743, loss=46.778, backward_time=0.399, grad_norm=36.047, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.740e-04, train_time=1.402 +[gpub010:0/16] 2024-01-29 02:30:34,291 (trainer:737) INFO: 14epoch:train:4801-4900batch: iter_time=9.054e-05, forward_time=0.293, loss_ctc=57.430, loss_att=47.937, acc=0.732, loss=50.785, backward_time=0.403, grad_norm=30.321, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.740e-04, train_time=1.439 +[gpub010:0/16] 2024-01-29 02:33:13,689 (trainer:737) INFO: 14epoch:train:4901-5000batch: iter_time=2.963e-04, forward_time=0.366, loss_ctc=49.804, loss_att=52.605, acc=0.727, loss=51.764, backward_time=0.458, grad_norm=31.114, clip=100.000, loss_scale=5.815e+33, optim_step_time=0.100, optim0_lr0=2.739e-04, train_time=1.594 +[gpub010:0/16] 2024-01-29 02:33:33,901 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-01-29 02:33:52,658 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 02:33:56,542 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 02:33:56,543 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-01-29 02:33:56,546 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 02:41:35,278 (trainer:737) INFO: 14epoch:train:5001-5100batch: iter_time=3.226, forward_time=0.291, loss_ctc=56.976, loss_att=65.527, acc=0.713, loss=62.961, backward_time=0.403, grad_norm=32.257, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.738e-04, train_time=5.016 +[gpub010:0/16] 2024-01-29 02:44:10,375 (trainer:737) INFO: 14epoch:train:5101-5200batch: iter_time=8.478e-05, forward_time=0.290, loss_ctc=61.058, loss_att=56.214, acc=0.707, loss=57.668, backward_time=0.400, grad_norm=34.830, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.738e-04, train_time=1.551 +[gpub010:0/16] 2024-01-29 02:44:16,741 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 02:46:41,998 (trainer:737) INFO: 14epoch:train:5201-5300batch: iter_time=8.237e-05, forward_time=0.301, loss_ctc=51.174, loss_att=56.135, acc=0.712, loss=54.646, backward_time=0.403, grad_norm=31.315, clip=100.000, loss_scale=5.402e+33, optim_step_time=0.092, optim0_lr0=2.737e-04, train_time=1.516 +[gpub010:0/16] 2024-01-29 02:49:26,125 (trainer:737) INFO: 14epoch:train:5301-5400batch: iter_time=8.885e-05, forward_time=0.373, loss_ctc=49.163, loss_att=45.337, acc=0.735, loss=46.485, backward_time=0.435, grad_norm=30.797, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.736e-04, train_time=1.641 +[gpub010:0/16] 2024-01-29 02:51:52,234 (trainer:737) INFO: 14epoch:train:5401-5500batch: iter_time=8.460e-05, forward_time=0.290, loss_ctc=54.582, loss_att=43.036, acc=0.732, loss=46.500, backward_time=0.400, grad_norm=32.650, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.736e-04, train_time=1.461 +[gpub010:0/16] 2024-01-29 02:54:19,537 (trainer:737) INFO: 14epoch:train:5501-5600batch: iter_time=8.423e-05, forward_time=0.302, loss_ctc=54.110, loss_att=51.165, acc=0.717, loss=52.048, backward_time=0.404, grad_norm=32.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.735e-04, train_time=1.472 +[gpub010:0/16] 2024-01-29 02:57:05,115 (trainer:737) INFO: 14epoch:train:5601-5700batch: iter_time=8.376e-05, forward_time=0.365, loss_ctc=50.736, loss_att=54.102, acc=0.708, loss=53.092, backward_time=0.455, grad_norm=31.735, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.734e-04, train_time=1.657 +[gpub010:0/16] 2024-01-29 02:59:31,615 (trainer:737) INFO: 14epoch:train:5701-5800batch: iter_time=8.211e-05, forward_time=0.290, loss_ctc=51.793, loss_att=46.375, acc=0.734, loss=48.000, backward_time=0.400, grad_norm=32.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.734e-04, train_time=1.465 +[gpub010:0/16] 2024-01-29 03:01:54,214 (trainer:737) INFO: 14epoch:train:5801-5900batch: iter_time=8.328e-05, forward_time=0.297, loss_ctc=49.486, loss_att=45.509, acc=0.742, loss=46.702, backward_time=0.408, grad_norm=29.796, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.733e-04, train_time=1.426 +[gpub010:0/16] 2024-01-29 03:04:38,329 (trainer:737) INFO: 14epoch:train:5901-6000batch: iter_time=8.356e-05, forward_time=0.298, loss_ctc=58.054, loss_att=47.025, acc=0.753, loss=50.334, backward_time=0.406, grad_norm=36.508, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.732e-04, train_time=1.640 +[gpub010:0/16] 2024-01-29 03:07:24,134 (trainer:737) INFO: 14epoch:train:6001-6100batch: iter_time=8.330e-05, forward_time=0.429, loss_ctc=51.213, loss_att=45.700, acc=0.736, loss=47.354, backward_time=0.427, grad_norm=30.190, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.731e-04, train_time=1.656 +[gpub010:0/16] 2024-01-29 03:09:49,105 (trainer:737) INFO: 14epoch:train:6101-6200batch: iter_time=8.276e-05, forward_time=0.290, loss_ctc=53.670, loss_att=51.498, acc=0.719, loss=52.150, backward_time=0.401, grad_norm=32.921, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.731e-04, train_time=1.452 +[gpub010:0/16] 2024-01-29 03:11:17,692 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-01-29 03:11:36,903 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 03:11:40,799 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 03:11:40,799 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-01-29 03:11:40,802 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 03:17:35,583 (trainer:737) INFO: 14epoch:train:6201-6300batch: iter_time=3.152, forward_time=0.362, loss_ctc=50.609, loss_att=55.752, acc=0.719, loss=54.209, backward_time=0.416, grad_norm=29.938, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.730e-04, train_time=4.665 +[gpub010:0/16] 2024-01-29 03:20:18,792 (trainer:737) INFO: 14epoch:train:6301-6400batch: iter_time=8.459e-05, forward_time=0.291, loss_ctc=61.349, loss_att=60.273, acc=0.713, loss=60.596, backward_time=0.404, grad_norm=34.308, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.729e-04, train_time=1.631 +[gpub010:0/16] 2024-01-29 03:22:55,398 (trainer:737) INFO: 14epoch:train:6401-6500batch: iter_time=8.408e-05, forward_time=0.290, loss_ctc=54.918, loss_att=54.653, acc=0.702, loss=54.732, backward_time=0.401, grad_norm=32.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.729e-04, train_time=1.566 +[gpub010:0/16] 2024-01-29 03:25:43,272 (trainer:737) INFO: 14epoch:train:6501-6600batch: iter_time=8.251e-05, forward_time=0.362, loss_ctc=52.178, loss_att=51.992, acc=0.736, loss=52.048, backward_time=0.441, grad_norm=30.526, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.728e-04, train_time=1.679 +[gpub010:0/16] 2024-01-29 03:28:22,779 (trainer:737) INFO: 14epoch:train:6601-6700batch: iter_time=8.218e-05, forward_time=0.301, loss_ctc=49.645, loss_att=40.035, acc=0.747, loss=42.918, backward_time=0.397, grad_norm=29.395, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.727e-04, train_time=1.595 +[gpub010:0/16] 2024-01-29 03:30:40,710 (trainer:737) INFO: 14epoch:train:6701-6800batch: iter_time=8.294e-05, forward_time=0.289, loss_ctc=57.199, loss_att=47.921, acc=0.712, loss=50.704, backward_time=0.400, grad_norm=34.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.727e-04, train_time=1.379 +[gpub010:0/16] 2024-01-29 03:33:48,936 (trainer:737) INFO: 14epoch:train:6801-6900batch: iter_time=8.852e-05, forward_time=0.359, loss_ctc=54.174, loss_att=58.765, acc=0.707, loss=57.388, backward_time=0.444, grad_norm=33.473, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.726e-04, train_time=1.882 +[gpub010:0/16] 2024-01-29 03:36:26,766 (trainer:737) INFO: 14epoch:train:6901-7000batch: iter_time=9.035e-05, forward_time=0.288, loss_ctc=49.550, loss_att=47.652, acc=0.718, loss=48.222, backward_time=0.399, grad_norm=29.977, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.725e-04, train_time=1.579 +[gpub010:0/16] 2024-01-29 03:38:57,760 (trainer:737) INFO: 14epoch:train:7001-7100batch: iter_time=8.389e-05, forward_time=0.289, loss_ctc=49.224, loss_att=41.205, acc=0.756, loss=43.611, backward_time=0.401, grad_norm=27.497, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.725e-04, train_time=1.510 +[gpub010:0/16] 2024-01-29 03:41:25,294 (trainer:737) INFO: 14epoch:train:7101-7200batch: iter_time=2.210e-04, forward_time=0.315, loss_ctc=50.809, loss_att=46.858, acc=0.751, loss=48.043, backward_time=0.430, grad_norm=29.284, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.724e-04, train_time=1.475 +[gpub010:0/16] 2024-01-29 03:44:18,567 (trainer:737) INFO: 14epoch:train:7201-7300batch: iter_time=9.473e-05, forward_time=0.367, loss_ctc=51.305, loss_att=45.031, acc=0.736, loss=46.913, backward_time=0.407, grad_norm=37.910, clip=100.000, loss_scale=1.012e+34, optim_step_time=0.093, optim0_lr0=2.723e-04, train_time=1.733 +[gpub010:0/16] 2024-01-29 03:46:55,948 (trainer:737) INFO: 14epoch:train:7301-7400batch: iter_time=8.846e-05, forward_time=0.291, loss_ctc=57.499, loss_att=46.971, acc=0.730, loss=50.129, backward_time=0.402, grad_norm=30.140, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.723e-04, train_time=1.574 +[gpub010:0/16] 2024-01-29 03:49:18,220 (trainer:737) INFO: 14epoch:train:7401-7500batch: iter_time=7.783e-05, forward_time=0.310, loss_ctc=49.307, loss_att=52.661, acc=0.721, loss=51.655, backward_time=0.403, grad_norm=30.859, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.722e-04, train_time=1.423 +[gpub010:0/16] 2024-01-29 03:49:38,248 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-01-29 03:49:57,047 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 03:50:00,618 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 03:50:00,618 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-01-29 03:50:00,621 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 03:57:43,776 (trainer:737) INFO: 14epoch:train:7501-7600batch: iter_time=3.388, forward_time=0.392, loss_ctc=57.024, loss_att=64.866, acc=0.729, loss=62.514, backward_time=0.422, grad_norm=32.744, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.721e-04, train_time=5.055 +[gpub010:0/16] 2024-01-29 03:59:55,998 (trainer:737) INFO: 14epoch:train:7601-7700batch: iter_time=8.340e-05, forward_time=0.291, loss_ctc=60.021, loss_att=56.757, acc=0.711, loss=57.736, backward_time=0.402, grad_norm=34.048, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.721e-04, train_time=1.322 +[gpub010:0/16] 2024-01-29 04:02:44,776 (trainer:737) INFO: 14epoch:train:7701-7800batch: iter_time=8.651e-05, forward_time=0.382, loss_ctc=50.247, loss_att=56.237, acc=0.716, loss=54.440, backward_time=0.422, grad_norm=30.162, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.119, optim0_lr0=2.720e-04, train_time=1.688 +[gpub010:0/16] 2024-01-29 04:04:13,073 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 04:05:14,030 (trainer:737) INFO: 14epoch:train:7801-7900batch: iter_time=8.810e-05, forward_time=0.289, loss_ctc=48.131, loss_att=45.312, acc=0.743, loss=46.158, backward_time=0.400, grad_norm=42.795, clip=100.000, loss_scale=8.182e+33, optim_step_time=0.092, optim0_lr0=2.719e-04, train_time=1.492 +[gpub010:0/16] 2024-01-29 04:09:11,237 (trainer:737) INFO: 14epoch:train:7901-8000batch: iter_time=8.956e-05, forward_time=0.380, loss_ctc=54.141, loss_att=44.251, acc=0.738, loss=47.218, backward_time=0.437, grad_norm=33.282, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=2.719e-04, train_time=2.372 +[gpub010:0/16] 2024-01-29 04:11:42,059 (trainer:737) INFO: 14epoch:train:8001-8100batch: iter_time=9.537e-05, forward_time=0.290, loss_ctc=53.895, loss_att=50.117, acc=0.730, loss=51.250, backward_time=0.404, grad_norm=30.645, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.718e-04, train_time=1.508 +[gpub010:0/16] 2024-01-29 04:14:29,193 (trainer:737) INFO: 14epoch:train:8101-8200batch: iter_time=9.802e-05, forward_time=0.414, loss_ctc=50.361, loss_att=54.038, acc=0.720, loss=52.935, backward_time=0.423, grad_norm=29.775, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=2.717e-04, train_time=1.671 +[gpub010:0/16] 2024-01-29 04:16:56,962 (trainer:737) INFO: 14epoch:train:8201-8300batch: iter_time=9.563e-05, forward_time=0.290, loss_ctc=51.300, loss_att=45.719, acc=0.744, loss=47.394, backward_time=0.401, grad_norm=28.141, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.717e-04, train_time=1.478 +[gpub010:0/16] 2024-01-29 04:19:53,453 (trainer:737) INFO: 14epoch:train:8301-8400batch: iter_time=4.007e-04, forward_time=0.458, loss_ctc=49.552, loss_att=44.572, acc=0.754, loss=46.066, backward_time=0.438, grad_norm=28.907, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.716e-04, train_time=1.763 +[gpub010:0/16] 2024-01-29 04:22:17,792 (trainer:737) INFO: 14epoch:train:8401-8500batch: iter_time=8.591e-05, forward_time=0.291, loss_ctc=56.682, loss_att=47.941, acc=0.768, loss=50.563, backward_time=0.403, grad_norm=51.888, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.715e-04, train_time=1.445 +[gpub010:0/16] 2024-01-29 04:24:47,583 (trainer:737) INFO: 14epoch:train:8501-8600batch: iter_time=9.557e-05, forward_time=0.290, loss_ctc=50.974, loss_att=46.689, acc=0.738, loss=47.975, backward_time=0.402, grad_norm=28.945, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.715e-04, train_time=1.498 +[gpub010:0/16] 2024-01-29 04:27:24,926 (trainer:737) INFO: 14epoch:train:8601-8700batch: iter_time=4.733e-04, forward_time=0.466, loss_ctc=53.620, loss_att=51.410, acc=0.728, loss=52.073, backward_time=0.444, grad_norm=31.829, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.714e-04, train_time=1.571 +[gpub010:0/16] 2024-01-29 04:29:14,583 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-01-29 04:29:33,581 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 04:29:37,147 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 04:29:37,148 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-01-29 04:29:37,151 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 04:35:40,333 (trainer:737) INFO: 14epoch:train:8701-8800batch: iter_time=3.169, forward_time=0.291, loss_ctc=49.880, loss_att=57.078, acc=0.721, loss=54.918, backward_time=0.402, grad_norm=29.645, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.713e-04, train_time=4.956 +[gpub010:0/16] 2024-01-29 04:38:27,515 (trainer:737) INFO: 14epoch:train:8801-8900batch: iter_time=0.002, forward_time=0.431, loss_ctc=60.007, loss_att=60.417, acc=0.714, loss=60.294, backward_time=0.422, grad_norm=34.307, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.713e-04, train_time=1.672 +[gpub010:0/16] 2024-01-29 04:40:48,221 (trainer:737) INFO: 14epoch:train:8901-9000batch: iter_time=8.291e-05, forward_time=0.288, loss_ctc=54.565, loss_att=55.261, acc=0.702, loss=55.052, backward_time=0.399, grad_norm=31.771, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.712e-04, train_time=1.406 +[gpub010:0/16] 2024-01-29 04:43:45,593 (trainer:737) INFO: 14epoch:train:9001-9100batch: iter_time=9.243e-05, forward_time=0.391, loss_ctc=51.732, loss_att=52.130, acc=0.736, loss=52.011, backward_time=0.461, grad_norm=29.751, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.711e-04, train_time=1.774 +[gpub010:0/16] 2024-01-29 04:45:59,729 (trainer:737) INFO: 14epoch:train:9101-9200batch: iter_time=8.918e-05, forward_time=0.301, loss_ctc=49.555, loss_att=40.676, acc=0.747, loss=43.340, backward_time=0.400, grad_norm=29.747, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.711e-04, train_time=1.341 +[gpub010:0/16] 2024-01-29 04:48:34,420 (trainer:737) INFO: 14epoch:train:9201-9300batch: iter_time=9.644e-05, forward_time=0.289, loss_ctc=56.605, loss_att=47.585, acc=0.712, loss=50.291, backward_time=0.400, grad_norm=37.832, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.710e-04, train_time=1.545 +[gpub010:0/16] 2024-01-29 04:51:14,434 (trainer:737) INFO: 14epoch:train:9301-9400batch: iter_time=8.577e-05, forward_time=0.415, loss_ctc=53.754, loss_att=58.817, acc=0.707, loss=57.298, backward_time=0.447, grad_norm=32.614, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.709e-04, train_time=1.601 +[gpub010:0/16] 2024-01-29 04:53:43,472 (trainer:737) INFO: 14epoch:train:9401-9500batch: iter_time=8.606e-05, forward_time=0.288, loss_ctc=49.711, loss_att=47.024, acc=0.724, loss=47.830, backward_time=0.399, grad_norm=29.196, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.709e-04, train_time=1.491 +[gpub010:0/16] 2024-01-29 04:56:19,993 (trainer:737) INFO: 14epoch:train:9501-9600batch: iter_time=8.565e-05, forward_time=0.289, loss_ctc=49.534, loss_att=41.093, acc=0.758, loss=43.626, backward_time=0.402, grad_norm=28.539, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.708e-04, train_time=1.564 +[gpub010:0/16] 2024-01-29 04:58:52,943 (trainer:737) INFO: 14epoch:train:9601-9700batch: iter_time=8.629e-05, forward_time=0.372, loss_ctc=50.350, loss_att=46.919, acc=0.750, loss=47.948, backward_time=0.451, grad_norm=30.052, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=2.707e-04, train_time=1.530 +[gpub010:0/16] 2024-01-29 05:01:15,083 (trainer:737) INFO: 14epoch:train:9701-9800batch: iter_time=8.753e-05, forward_time=0.288, loss_ctc=52.063, loss_att=44.716, acc=0.737, loss=46.920, backward_time=0.399, grad_norm=37.744, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.707e-04, train_time=1.421 +[gpub010:0/16] 2024-01-29 05:04:02,376 (trainer:737) INFO: 14epoch:train:9801-9900batch: iter_time=3.632e-04, forward_time=0.382, loss_ctc=57.097, loss_att=46.956, acc=0.732, loss=49.998, backward_time=0.448, grad_norm=30.198, clip=100.000, loss_scale=7.373e+33, optim_step_time=0.100, optim0_lr0=2.706e-04, train_time=1.671 +[gpub010:0/16] 2024-01-29 05:06:39,288 (trainer:737) INFO: 14epoch:train:9901-10000batch: iter_time=7.882e-05, forward_time=0.301, loss_ctc=49.125, loss_att=52.858, acc=0.721, loss=51.738, backward_time=0.403, grad_norm=29.729, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.705e-04, train_time=1.570 +[gpub010:0/16] 2024-01-29 05:06:59,316 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-01-29 05:07:18,147 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 05:07:21,720 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 05:07:21,720 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-01-29 05:07:21,723 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 05:14:21,918 (trainer:737) INFO: 14epoch:train:10001-10100batch: iter_time=3.109, forward_time=0.293, loss_ctc=56.322, loss_att=64.534, acc=0.729, loss=62.070, backward_time=0.408, grad_norm=31.678, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.705e-04, train_time=4.626 +[gpub010:0/16] 2024-01-29 05:17:23,784 (trainer:737) INFO: 14epoch:train:10101-10200batch: iter_time=8.352e-05, forward_time=0.449, loss_ctc=60.279, loss_att=56.899, acc=0.714, loss=57.913, backward_time=0.435, grad_norm=33.861, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.704e-04, train_time=1.817 +[gpub010:0/16] 2024-01-29 05:19:38,900 (trainer:737) INFO: 14epoch:train:10201-10300batch: iter_time=8.122e-05, forward_time=0.291, loss_ctc=49.510, loss_att=56.098, acc=0.716, loss=54.122, backward_time=0.403, grad_norm=28.531, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.703e-04, train_time=1.352 +[gpub010:0/16] 2024-01-29 05:22:27,279 (trainer:737) INFO: 14epoch:train:10301-10400batch: iter_time=8.345e-05, forward_time=0.400, loss_ctc=47.876, loss_att=45.389, acc=0.742, loss=46.135, backward_time=0.425, grad_norm=28.860, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=2.703e-04, train_time=1.682 +[gpub010:0/16] 2024-01-29 05:25:11,206 (trainer:737) INFO: 14epoch:train:10401-10500batch: iter_time=8.624e-05, forward_time=0.290, loss_ctc=53.590, loss_att=44.387, acc=0.737, loss=47.148, backward_time=0.401, grad_norm=32.635, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.702e-04, train_time=1.640 +[gpub010:0/16] 2024-01-29 05:27:40,469 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 05:27:44,430 (trainer:737) INFO: 14epoch:train:10501-10600batch: iter_time=8.242e-05, forward_time=0.375, loss_ctc=52.778, loss_att=49.019, acc=0.735, loss=50.147, backward_time=0.422, grad_norm=30.223, clip=100.000, loss_scale=1.023e+34, optim_step_time=0.106, optim0_lr0=2.701e-04, train_time=1.532 +[gpub010:0/16] 2024-01-29 05:30:23,024 (trainer:737) INFO: 14epoch:train:10601-10700batch: iter_time=8.590e-05, forward_time=0.290, loss_ctc=49.295, loss_att=54.228, acc=0.719, loss=52.748, backward_time=0.402, grad_norm=30.100, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.701e-04, train_time=1.584 +[gpub010:0/16] 2024-01-29 05:33:03,264 (trainer:737) INFO: 14epoch:train:10701-10800batch: iter_time=8.456e-05, forward_time=0.400, loss_ctc=51.116, loss_att=46.324, acc=0.743, loss=47.762, backward_time=0.435, grad_norm=29.246, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.700e-04, train_time=1.604 +[gpub010:0/16] 2024-01-29 05:33:13,393 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 05:35:46,104 (trainer:737) INFO: 14epoch:train:10801-10900batch: iter_time=8.410e-05, forward_time=0.291, loss_ctc=48.958, loss_att=44.801, acc=0.755, loss=46.048, backward_time=0.402, grad_norm=28.872, clip=100.000, loss_scale=2.675e+33, optim_step_time=0.093, optim0_lr0=2.699e-04, train_time=1.627 +[gpub010:0/16] 2024-01-29 05:38:35,628 (trainer:737) INFO: 14epoch:train:10901-11000batch: iter_time=8.250e-05, forward_time=0.409, loss_ctc=57.000, loss_att=48.096, acc=0.767, loss=50.767, backward_time=0.475, grad_norm=36.801, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.699e-04, train_time=1.696 +[gpub010:0/16] 2024-01-29 05:41:08,076 (trainer:737) INFO: 14epoch:train:11001-11100batch: iter_time=8.553e-05, forward_time=0.293, loss_ctc=50.377, loss_att=45.788, acc=0.741, loss=47.164, backward_time=0.400, grad_norm=29.045, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.698e-04, train_time=1.524 +[gpub010:0/16] 2024-01-29 05:43:44,700 (trainer:737) INFO: 14epoch:train:11101-11200batch: iter_time=8.142e-05, forward_time=0.329, loss_ctc=52.836, loss_att=51.245, acc=0.728, loss=51.722, backward_time=0.405, grad_norm=31.756, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.697e-04, train_time=1.566 +[gpub010:0/16] 2024-01-29 05:45:27,963 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-01-29 05:45:46,973 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 05:45:50,616 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 05:45:50,616 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-01-29 05:45:50,619 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 05:51:58,905 (trainer:737) INFO: 14epoch:train:11201-11300batch: iter_time=3.232, forward_time=0.367, loss_ctc=49.172, loss_att=54.566, acc=0.735, loss=52.948, backward_time=0.448, grad_norm=28.284, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.697e-04, train_time=4.942 +[gpub010:0/16] 2024-01-29 05:54:42,203 (trainer:737) INFO: 14epoch:train:11301-11400batch: iter_time=8.131e-05, forward_time=0.292, loss_ctc=60.110, loss_att=61.616, acc=0.720, loss=61.164, backward_time=0.405, grad_norm=34.308, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.696e-04, train_time=1.633 +[gpub010:0/16] 2024-01-29 05:57:02,959 (trainer:737) INFO: 14epoch:train:11401-11500batch: iter_time=8.342e-05, forward_time=0.309, loss_ctc=54.185, loss_att=54.776, acc=0.714, loss=54.599, backward_time=0.422, grad_norm=31.954, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.696e-04, train_time=1.407 +[gpub010:0/16] 2024-01-29 05:59:59,813 (trainer:737) INFO: 14epoch:train:11501-11600batch: iter_time=9.015e-05, forward_time=0.385, loss_ctc=51.184, loss_att=51.753, acc=0.745, loss=51.582, backward_time=0.437, grad_norm=29.317, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.695e-04, train_time=1.769 +[gpub010:0/16] 2024-01-29 06:02:28,710 (trainer:737) INFO: 14epoch:train:11601-11700batch: iter_time=8.661e-05, forward_time=0.287, loss_ctc=48.797, loss_att=40.187, acc=0.751, loss=42.770, backward_time=0.399, grad_norm=29.799, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.694e-04, train_time=1.489 +[gpub010:0/16] 2024-01-29 06:04:47,789 (trainer:737) INFO: 14epoch:train:11701-11800batch: iter_time=9.877e-05, forward_time=0.291, loss_ctc=56.723, loss_att=49.135, acc=0.719, loss=51.411, backward_time=0.407, grad_norm=34.604, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.694e-04, train_time=1.391 +[gpub010:0/16] 2024-01-29 06:08:00,614 (trainer:737) INFO: 14epoch:train:11801-11900batch: iter_time=9.885e-05, forward_time=0.400, loss_ctc=52.980, loss_att=58.314, acc=0.718, loss=56.714, backward_time=0.444, grad_norm=31.946, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.693e-04, train_time=1.928 +[gpub010:0/16] 2024-01-29 06:10:12,373 (trainer:737) INFO: 14epoch:train:11901-12000batch: iter_time=9.282e-05, forward_time=0.289, loss_ctc=48.764, loss_att=46.286, acc=0.737, loss=47.029, backward_time=0.400, grad_norm=29.437, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.692e-04, train_time=1.318 +[gpub010:0/16] 2024-01-29 06:12:33,375 (trainer:737) INFO: 14epoch:train:12001-12100batch: iter_time=9.968e-05, forward_time=0.290, loss_ctc=49.405, loss_att=40.598, acc=0.764, loss=43.240, backward_time=0.403, grad_norm=27.912, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.692e-04, train_time=1.410 +[gpub010:0/16] 2024-01-29 06:15:44,980 (trainer:737) INFO: 14epoch:train:12101-12200batch: iter_time=4.902e-04, forward_time=0.406, loss_ctc=49.918, loss_att=47.450, acc=0.764, loss=48.190, backward_time=0.447, grad_norm=28.002, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.691e-04, train_time=1.916 +[gpub010:0/16] 2024-01-29 06:17:58,655 (trainer:737) INFO: 14epoch:train:12201-12300batch: iter_time=9.560e-05, forward_time=0.289, loss_ctc=50.308, loss_att=44.123, acc=0.747, loss=45.979, backward_time=0.400, grad_norm=35.924, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.690e-04, train_time=1.336 +[gpub010:0/16] 2024-01-29 06:20:44,827 (trainer:737) INFO: 14epoch:train:12301-12400batch: iter_time=9.077e-05, forward_time=0.292, loss_ctc=56.404, loss_att=47.053, acc=0.737, loss=49.858, backward_time=0.404, grad_norm=32.678, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.690e-04, train_time=1.662 +[gpub010:0/16] 2024-01-29 06:23:36,020 (trainer:737) INFO: 14epoch:train:12401-12500batch: iter_time=3.089e-04, forward_time=0.437, loss_ctc=48.537, loss_att=52.077, acc=0.731, loss=51.015, backward_time=0.443, grad_norm=30.068, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.689e-04, train_time=1.711 +[gpub010:0/16] 2024-01-29 06:23:56,172 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-01-29 06:24:15,174 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 06:24:18,826 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 06:24:18,826 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-01-29 06:24:18,829 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 06:31:27,390 (trainer:737) INFO: 14epoch:train:12501-12600batch: iter_time=3.099, forward_time=0.293, loss_ctc=56.704, loss_att=61.339, acc=0.735, loss=59.948, backward_time=0.406, grad_norm=30.810, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.688e-04, train_time=4.714 +[gpub010:0/16] 2024-01-29 06:34:16,654 (trainer:737) INFO: 14epoch:train:12601-12700batch: iter_time=9.458e-05, forward_time=0.364, loss_ctc=59.414, loss_att=56.518, acc=0.714, loss=57.387, backward_time=0.436, grad_norm=34.854, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.688e-04, train_time=1.693 +[gpub010:0/16] 2024-01-29 06:36:33,726 (trainer:737) INFO: 14epoch:train:12701-12800batch: iter_time=9.024e-05, forward_time=0.292, loss_ctc=49.450, loss_att=54.924, acc=0.720, loss=53.282, backward_time=0.404, grad_norm=28.857, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.687e-04, train_time=1.371 +[gpub010:0/16] 2024-01-29 06:39:21,598 (trainer:737) INFO: 14epoch:train:12801-12900batch: iter_time=9.276e-05, forward_time=0.294, loss_ctc=47.725, loss_att=44.499, acc=0.744, loss=45.467, backward_time=0.401, grad_norm=28.520, clip=100.000, loss_scale=5.088e+33, optim_step_time=0.092, optim0_lr0=2.686e-04, train_time=1.678 +[gpub010:0/16] 2024-01-29 06:42:15,716 (trainer:737) INFO: 14epoch:train:12901-13000batch: iter_time=9.989e-05, forward_time=0.343, loss_ctc=53.570, loss_att=43.526, acc=0.741, loss=46.539, backward_time=0.444, grad_norm=33.581, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.686e-04, train_time=1.742 +[gpub010:0/16] 2024-01-29 06:44:49,247 (trainer:737) INFO: 14epoch:train:13001-13100batch: iter_time=9.745e-05, forward_time=0.314, loss_ctc=52.621, loss_att=49.058, acc=0.733, loss=50.127, backward_time=0.409, grad_norm=32.058, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.685e-04, train_time=1.534 +[gpub010:0/16] 2024-01-29 06:47:22,063 (trainer:737) INFO: 14epoch:train:13101-13200batch: iter_time=9.468e-05, forward_time=0.290, loss_ctc=49.792, loss_att=53.620, acc=0.725, loss=52.471, backward_time=0.402, grad_norm=31.003, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.684e-04, train_time=1.529 +[gpub010:0/16] 2024-01-29 06:49:59,518 (trainer:737) INFO: 14epoch:train:13201-13300batch: iter_time=5.173e-04, forward_time=0.378, loss_ctc=51.021, loss_att=45.581, acc=0.749, loss=47.213, backward_time=0.432, grad_norm=27.874, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.684e-04, train_time=1.574 +[gpub010:0/16] 2024-01-29 06:52:53,494 (trainer:737) INFO: 14epoch:train:13301-13400batch: iter_time=9.723e-05, forward_time=0.324, loss_ctc=48.966, loss_att=44.041, acc=0.756, loss=45.518, backward_time=0.411, grad_norm=29.305, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.683e-04, train_time=1.740 +[gpub010:0/16] 2024-01-29 06:55:10,497 (trainer:737) INFO: 14epoch:train:13401-13500batch: iter_time=1.022e-04, forward_time=0.293, loss_ctc=55.049, loss_att=46.999, acc=0.770, loss=49.414, backward_time=0.405, grad_norm=36.876, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.683e-04, train_time=1.369 +[gpub010:0/16] 2024-01-29 06:58:29,593 (trainer:737) INFO: 14epoch:train:13501-13600batch: iter_time=3.288e-04, forward_time=0.400, loss_ctc=50.686, loss_att=46.111, acc=0.740, loss=47.484, backward_time=0.446, grad_norm=29.130, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.682e-04, train_time=1.991 +[gpub010:0/16] 2024-01-29 07:00:47,221 (trainer:737) INFO: 14epoch:train:13601-13700batch: iter_time=9.792e-05, forward_time=0.292, loss_ctc=52.361, loss_att=50.612, acc=0.731, loss=51.137, backward_time=0.404, grad_norm=31.349, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.681e-04, train_time=1.376 +[gpub010:0/16] 2024-01-29 07:02:32,698 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-01-29 07:02:52,241 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 07:02:56,165 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 07:02:56,165 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-01-29 07:02:56,169 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 07:10:03,034 (trainer:737) INFO: 14epoch:train:13701-13800batch: iter_time=3.853, forward_time=0.295, loss_ctc=48.840, loss_att=56.923, acc=0.723, loss=54.498, backward_time=0.404, grad_norm=29.607, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.681e-04, train_time=5.558 +[gpub010:0/16] 2024-01-29 07:13:07,712 (trainer:737) INFO: 14epoch:train:13801-13900batch: iter_time=8.021e-05, forward_time=0.358, loss_ctc=59.683, loss_att=61.435, acc=0.714, loss=60.909, backward_time=0.470, grad_norm=43.139, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.680e-04, train_time=1.847 +[gpub010:0/16] 2024-01-29 07:15:22,340 (trainer:737) INFO: 14epoch:train:13901-14000batch: iter_time=8.105e-05, forward_time=0.307, loss_ctc=53.509, loss_att=55.524, acc=0.704, loss=54.919, backward_time=0.403, grad_norm=31.465, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.679e-04, train_time=1.346 +[gpub010:0/16] 2024-01-29 07:18:27,568 (trainer:737) INFO: 14epoch:train:14001-14100batch: iter_time=7.981e-05, forward_time=0.293, loss_ctc=50.362, loss_att=52.061, acc=0.740, loss=51.551, backward_time=0.405, grad_norm=30.586, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.679e-04, train_time=1.852 +[gpub010:0/16] 2024-01-29 07:20:56,319 (trainer:737) INFO: 14epoch:train:14101-14200batch: iter_time=8.369e-05, forward_time=0.352, loss_ctc=48.290, loss_att=39.730, acc=0.750, loss=42.298, backward_time=0.460, grad_norm=29.944, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.678e-04, train_time=1.488 +[gpub010:0/16] 2024-01-29 07:23:38,546 (trainer:737) INFO: 14epoch:train:14201-14300batch: iter_time=8.315e-05, forward_time=0.324, loss_ctc=55.389, loss_att=47.537, acc=0.714, loss=49.892, backward_time=0.409, grad_norm=32.373, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.677e-04, train_time=1.622 +[gpub010:0/16] 2024-01-29 07:26:11,659 (trainer:737) INFO: 14epoch:train:14301-14400batch: iter_time=8.210e-05, forward_time=0.291, loss_ctc=52.429, loss_att=58.556, acc=0.710, loss=56.718, backward_time=0.404, grad_norm=33.285, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.677e-04, train_time=1.531 +[gpub010:0/16] 2024-01-29 07:28:52,194 (trainer:737) INFO: 14epoch:train:14401-14500batch: iter_time=8.350e-05, forward_time=0.290, loss_ctc=48.836, loss_att=47.397, acc=0.722, loss=47.828, backward_time=0.409, grad_norm=30.034, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.676e-04, train_time=1.604 +[gpub010:0/16] 2024-01-29 07:31:37,617 (trainer:737) INFO: 14epoch:train:14501-14600batch: iter_time=8.138e-05, forward_time=0.348, loss_ctc=49.256, loss_att=41.319, acc=0.756, loss=43.700, backward_time=0.468, grad_norm=29.893, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.676e-04, train_time=1.655 +[gpub010:0/16] 2024-01-29 07:34:17,443 (trainer:737) INFO: 14epoch:train:14601-14700batch: iter_time=8.196e-05, forward_time=0.314, loss_ctc=49.066, loss_att=46.349, acc=0.753, loss=47.164, backward_time=0.408, grad_norm=29.890, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.675e-04, train_time=1.598 +[gpub010:0/16] 2024-01-29 07:36:39,420 (trainer:737) INFO: 14epoch:train:14701-14800batch: iter_time=2.098e-04, forward_time=0.338, loss_ctc=50.595, loss_att=45.158, acc=0.738, loss=46.789, backward_time=0.426, grad_norm=39.118, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.674e-04, train_time=1.419 +[gpub010:0/16] 2024-01-29 07:36:59,283 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 07:39:24,631 (trainer:737) INFO: 14epoch:train:14801-14900batch: iter_time=8.262e-05, forward_time=0.332, loss_ctc=56.323, loss_att=46.495, acc=0.734, loss=49.444, backward_time=0.420, grad_norm=32.549, clip=100.000, loss_scale=5.664e+33, optim_step_time=0.092, optim0_lr0=2.674e-04, train_time=1.652 +[gpub010:0/16] 2024-01-29 07:41:54,084 (trainer:737) INFO: 14epoch:train:14901-15000batch: iter_time=7.957e-05, forward_time=0.290, loss_ctc=48.396, loss_att=51.919, acc=0.727, loss=50.862, backward_time=0.403, grad_norm=31.647, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.673e-04, train_time=1.495 +[gpub010:0/16] 2024-01-29 08:19:34,261 (trainer:343) INFO: 14epoch results: [train] iter_time=0.265, forward_time=0.329, loss_ctc=53.130, loss_att=50.514, acc=0.730, loss=51.299, backward_time=0.417, grad_norm=32.053, clip=100.000, loss_scale=5.975e+33, optim_step_time=0.095, optim0_lr0=2.722e-04, train_time=1.868, time=7 hours, 47 minutes and 26.16 seconds, total_count=240000, gpu_max_cached_mem_GB=42.072, [valid] loss_ctc=45.300, cer_ctc=0.228, loss_att=43.953, acc=0.646, cer=0.326, wer=0.995, loss=44.357, time=37 minutes and 16.23 seconds, total_count=74736, gpu_max_cached_mem_GB=42.072 +[gpub010:0/16] 2024-01-29 08:19:44,183 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub010:0/16] 2024-01-29 08:19:44,210 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/9epoch.pth +[gpub010:0/16] 2024-01-29 08:19:44,210 (trainer:272) INFO: 15/45epoch started. Estimated time to finish: 1 week, 4 days and 3 hours +[gpub010:0/16] 2024-01-29 08:19:44,221 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-01-29 08:20:02,715 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 08:20:06,181 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 08:20:06,181 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-01-29 08:20:06,185 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 08:27:04,509 (trainer:737) INFO: 15epoch:train:1-100batch: iter_time=2.789, forward_time=0.335, loss_ctc=52.082, loss_att=53.177, acc=0.715, loss=52.848, backward_time=0.410, grad_norm=31.885, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.672e-04, train_time=4.403 +[gpub010:0/16] 2024-01-29 08:29:22,578 (trainer:737) INFO: 15epoch:train:101-200batch: iter_time=8.335e-05, forward_time=0.302, loss_ctc=53.280, loss_att=43.521, acc=0.730, loss=46.448, backward_time=0.400, grad_norm=36.207, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.672e-04, train_time=1.380 +[gpub010:0/16] 2024-01-29 08:32:20,052 (trainer:737) INFO: 15epoch:train:201-300batch: iter_time=1.353e-04, forward_time=0.321, loss_ctc=54.009, loss_att=54.277, acc=0.718, loss=54.197, backward_time=0.421, grad_norm=32.916, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.671e-04, train_time=1.774 +[gpub010:0/16] 2024-01-29 08:34:56,376 (trainer:737) INFO: 15epoch:train:301-400batch: iter_time=9.242e-05, forward_time=0.324, loss_ctc=57.442, loss_att=54.865, acc=0.701, loss=55.638, backward_time=0.409, grad_norm=40.140, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.670e-04, train_time=1.563 +[gpub010:0/16] 2024-01-29 08:37:34,976 (trainer:737) INFO: 15epoch:train:401-500batch: iter_time=8.472e-05, forward_time=0.294, loss_ctc=61.243, loss_att=58.725, acc=0.717, loss=59.480, backward_time=0.413, grad_norm=33.859, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.670e-04, train_time=1.585 +[gpub010:0/16] 2024-01-29 08:40:13,784 (trainer:737) INFO: 15epoch:train:501-600batch: iter_time=8.783e-05, forward_time=0.376, loss_ctc=53.714, loss_att=49.089, acc=0.735, loss=50.476, backward_time=0.416, grad_norm=31.766, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.669e-04, train_time=1.589 +[gpub010:0/16] 2024-01-29 08:43:03,431 (trainer:737) INFO: 15epoch:train:601-700batch: iter_time=9.257e-05, forward_time=0.315, loss_ctc=62.565, loss_att=64.169, acc=0.704, loss=63.688, backward_time=0.420, grad_norm=36.694, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.669e-04, train_time=1.697 +[gpub010:0/16] 2024-01-29 08:45:51,714 (trainer:737) INFO: 15epoch:train:701-800batch: iter_time=8.972e-05, forward_time=0.318, loss_ctc=56.545, loss_att=50.381, acc=0.724, loss=52.230, backward_time=0.426, grad_norm=33.878, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.668e-04, train_time=1.682 +[gpub010:0/16] 2024-01-29 08:48:31,278 (trainer:737) INFO: 15epoch:train:801-900batch: iter_time=9.359e-05, forward_time=0.334, loss_ctc=55.734, loss_att=47.156, acc=0.765, loss=49.730, backward_time=0.423, grad_norm=30.066, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.667e-04, train_time=1.596 +[gpub010:0/16] 2024-01-29 08:51:05,583 (trainer:737) INFO: 15epoch:train:901-1000batch: iter_time=5.981e-04, forward_time=0.303, loss_ctc=56.656, loss_att=47.296, acc=0.740, loss=50.104, backward_time=0.411, grad_norm=31.814, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.667e-04, train_time=1.543 +[gpub010:0/16] 2024-01-29 08:53:15,406 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 08:53:51,452 (trainer:737) INFO: 15epoch:train:1001-1100batch: iter_time=9.123e-05, forward_time=0.347, loss_ctc=53.336, loss_att=54.683, acc=0.717, loss=54.279, backward_time=0.411, grad_norm=30.268, clip=100.000, loss_scale=4.615e+33, optim_step_time=0.099, optim0_lr0=2.666e-04, train_time=1.657 +[gpub010:0/16] 2024-01-29 08:56:19,852 (trainer:737) INFO: 15epoch:train:1101-1200batch: iter_time=8.600e-05, forward_time=0.328, loss_ctc=56.468, loss_att=58.075, acc=0.718, loss=57.593, backward_time=0.417, grad_norm=31.629, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.665e-04, train_time=1.485 +[gpub010:0/16] 2024-01-29 08:57:51,802 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-01-29 08:58:10,965 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 08:58:14,568 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 08:58:14,568 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-01-29 08:58:14,571 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 09:04:19,157 (trainer:737) INFO: 15epoch:train:1201-1300batch: iter_time=3.194, forward_time=0.349, loss_ctc=58.999, loss_att=59.217, acc=0.713, loss=59.152, backward_time=0.415, grad_norm=36.350, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.665e-04, train_time=4.793 +[gpub010:0/16] 2024-01-29 09:06:59,695 (trainer:737) INFO: 15epoch:train:1301-1400batch: iter_time=8.332e-05, forward_time=0.323, loss_ctc=52.082, loss_att=45.948, acc=0.712, loss=47.788, backward_time=0.403, grad_norm=34.895, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.664e-04, train_time=1.604 +[gpub010:0/16] 2024-01-29 09:09:42,637 (trainer:737) INFO: 15epoch:train:1401-1500batch: iter_time=8.423e-05, forward_time=0.304, loss_ctc=53.468, loss_att=53.913, acc=0.708, loss=53.779, backward_time=0.416, grad_norm=32.336, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.663e-04, train_time=1.630 +[gpub010:0/16] 2024-01-29 09:12:38,182 (trainer:737) INFO: 15epoch:train:1501-1600batch: iter_time=6.375e-04, forward_time=0.312, loss_ctc=54.374, loss_att=51.211, acc=0.706, loss=52.160, backward_time=0.446, grad_norm=37.730, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.663e-04, train_time=1.755 +[gpub010:0/16] 2024-01-29 09:15:17,231 (trainer:737) INFO: 15epoch:train:1601-1700batch: iter_time=8.657e-05, forward_time=0.344, loss_ctc=54.852, loss_att=53.415, acc=0.708, loss=53.846, backward_time=0.418, grad_norm=34.517, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.662e-04, train_time=1.590 +[gpub010:0/16] 2024-01-29 09:18:03,233 (trainer:737) INFO: 15epoch:train:1701-1800batch: iter_time=8.852e-05, forward_time=0.299, loss_ctc=58.176, loss_att=53.617, acc=0.726, loss=54.985, backward_time=0.402, grad_norm=32.325, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.662e-04, train_time=1.661 +[gpub010:0/16] 2024-01-29 09:21:00,984 (trainer:737) INFO: 15epoch:train:1801-1900batch: iter_time=8.251e-05, forward_time=0.355, loss_ctc=57.694, loss_att=53.158, acc=0.718, loss=54.519, backward_time=0.417, grad_norm=34.849, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.661e-04, train_time=1.777 +[gpub010:0/16] 2024-01-29 09:23:37,859 (trainer:737) INFO: 15epoch:train:1901-2000batch: iter_time=9.149e-05, forward_time=0.318, loss_ctc=56.249, loss_att=57.624, acc=0.707, loss=57.212, backward_time=0.417, grad_norm=31.423, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.660e-04, train_time=1.568 +[gpub010:0/16] 2024-01-29 09:26:29,604 (trainer:737) INFO: 15epoch:train:2001-2100batch: iter_time=8.324e-05, forward_time=0.292, loss_ctc=56.321, loss_att=49.730, acc=0.738, loss=51.707, backward_time=0.403, grad_norm=34.157, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.660e-04, train_time=1.718 +[gpub010:0/16] 2024-01-29 09:29:13,038 (trainer:737) INFO: 15epoch:train:2101-2200batch: iter_time=9.285e-05, forward_time=0.358, loss_ctc=50.493, loss_att=38.757, acc=0.764, loss=42.278, backward_time=0.434, grad_norm=27.293, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=2.659e-04, train_time=1.633 +[gpub010:0/16] 2024-01-29 09:31:55,935 (trainer:737) INFO: 15epoch:train:2201-2300batch: iter_time=8.818e-05, forward_time=0.304, loss_ctc=55.874, loss_att=52.855, acc=0.717, loss=53.761, backward_time=0.416, grad_norm=33.823, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.658e-04, train_time=1.629 +[gpub010:0/16] 2024-01-29 09:34:59,052 (trainer:737) INFO: 15epoch:train:2301-2400batch: iter_time=8.961e-05, forward_time=0.300, loss_ctc=55.096, loss_att=58.112, acc=0.716, loss=57.207, backward_time=0.402, grad_norm=32.324, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.658e-04, train_time=1.832 +[gpub010:0/16] 2024-01-29 09:37:34,606 (trainer:737) INFO: 15epoch:train:2401-2500batch: iter_time=8.574e-05, forward_time=0.379, loss_ctc=62.381, loss_att=60.784, acc=0.705, loss=61.263, backward_time=0.426, grad_norm=37.478, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.657e-04, train_time=1.555 +[gpub010:0/16] 2024-01-29 09:37:54,817 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-01-29 09:38:13,745 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 09:38:17,326 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 09:38:17,326 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-01-29 09:38:17,329 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 09:47:08,919 (trainer:737) INFO: 15epoch:train:2501-2600batch: iter_time=4.180, forward_time=0.321, loss_ctc=50.335, loss_att=51.504, acc=0.722, loss=51.153, backward_time=0.417, grad_norm=30.874, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.657e-04, train_time=5.743 +[gpub010:0/16] 2024-01-29 09:50:11,848 (trainer:737) INFO: 15epoch:train:2601-2700batch: iter_time=8.994e-04, forward_time=0.292, loss_ctc=49.502, loss_att=41.497, acc=0.736, loss=43.899, backward_time=0.430, grad_norm=32.050, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.656e-04, train_time=1.830 +[gpub010:0/16] 2024-01-29 09:52:44,166 (trainer:737) INFO: 15epoch:train:2701-2800batch: iter_time=8.468e-05, forward_time=0.367, loss_ctc=52.364, loss_att=53.538, acc=0.724, loss=53.186, backward_time=0.432, grad_norm=31.268, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.655e-04, train_time=1.523 +[gpub010:0/16] 2024-01-29 09:55:07,223 (trainer:737) INFO: 15epoch:train:2801-2900batch: iter_time=8.970e-05, forward_time=0.303, loss_ctc=53.689, loss_att=53.710, acc=0.708, loss=53.704, backward_time=0.408, grad_norm=39.311, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.655e-04, train_time=1.431 +[gpub010:0/16] 2024-01-29 09:57:43,584 (trainer:737) INFO: 15epoch:train:2901-3000batch: iter_time=8.747e-05, forward_time=0.300, loss_ctc=61.088, loss_att=58.047, acc=0.721, loss=58.959, backward_time=0.406, grad_norm=32.797, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.654e-04, train_time=1.563 +[gpub010:0/16] 2024-01-29 10:00:36,161 (trainer:737) INFO: 15epoch:train:3001-3100batch: iter_time=8.758e-05, forward_time=0.377, loss_ctc=52.530, loss_att=47.992, acc=0.740, loss=49.354, backward_time=0.442, grad_norm=32.674, clip=100.000, loss_scale=3.167e+33, optim_step_time=0.099, optim0_lr0=2.653e-04, train_time=1.726 +[gpub010:0/16] 2024-01-29 10:03:21,612 (trainer:737) INFO: 15epoch:train:3101-3200batch: iter_time=9.366e-05, forward_time=0.295, loss_ctc=60.394, loss_att=63.211, acc=0.708, loss=62.366, backward_time=0.407, grad_norm=36.151, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.653e-04, train_time=1.655 +[gpub010:0/16] 2024-01-29 10:05:48,899 (trainer:737) INFO: 15epoch:train:3201-3300batch: iter_time=8.762e-05, forward_time=0.338, loss_ctc=55.699, loss_att=50.120, acc=0.726, loss=51.793, backward_time=0.430, grad_norm=32.872, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.652e-04, train_time=1.473 +[gpub010:0/16] 2024-01-29 10:08:25,142 (trainer:737) INFO: 15epoch:train:3301-3400batch: iter_time=8.527e-05, forward_time=0.348, loss_ctc=54.287, loss_att=47.130, acc=0.767, loss=49.277, backward_time=0.422, grad_norm=30.975, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.652e-04, train_time=1.560 +[gpub010:0/16] 2024-01-29 10:10:19,157 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 10:11:08,876 (trainer:737) INFO: 15epoch:train:3401-3500batch: iter_time=8.398e-05, forward_time=0.290, loss_ctc=54.671, loss_att=46.934, acc=0.741, loss=49.255, backward_time=0.402, grad_norm=30.531, clip=100.000, loss_scale=4.327e+33, optim_step_time=0.092, optim0_lr0=2.651e-04, train_time=1.639 +[gpub010:0/16] 2024-01-29 10:13:38,072 (trainer:737) INFO: 15epoch:train:3501-3600batch: iter_time=8.709e-05, forward_time=0.348, loss_ctc=51.749, loss_att=53.077, acc=0.725, loss=52.679, backward_time=0.430, grad_norm=30.219, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.650e-04, train_time=1.492 +[gpub010:0/16] 2024-01-29 10:16:17,490 (trainer:737) INFO: 15epoch:train:3601-3700batch: iter_time=8.438e-05, forward_time=0.335, loss_ctc=55.143, loss_att=57.165, acc=0.722, loss=56.558, backward_time=0.426, grad_norm=30.525, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.650e-04, train_time=1.593 +[gpub010:0/16] 2024-01-29 10:18:14,855 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-01-29 10:18:34,281 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 10:18:38,234 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 10:18:38,234 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-01-29 10:18:38,237 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 10:24:23,343 (trainer:737) INFO: 15epoch:train:3701-3800batch: iter_time=3.090, forward_time=0.339, loss_ctc=55.594, loss_att=57.696, acc=0.714, loss=57.065, backward_time=0.407, grad_norm=44.643, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.649e-04, train_time=4.859 +[gpub010:0/16] 2024-01-29 10:26:47,589 (trainer:737) INFO: 15epoch:train:3801-3900batch: iter_time=8.738e-05, forward_time=0.318, loss_ctc=51.268, loss_att=45.221, acc=0.714, loss=47.035, backward_time=0.425, grad_norm=33.140, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.648e-04, train_time=1.442 +[gpub010:0/16] 2024-01-29 10:29:13,169 (trainer:737) INFO: 15epoch:train:3901-4000batch: iter_time=8.723e-05, forward_time=0.324, loss_ctc=52.449, loss_att=53.001, acc=0.710, loss=52.835, backward_time=0.419, grad_norm=31.978, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.648e-04, train_time=1.455 +[gpub010:0/16] 2024-01-29 10:32:05,306 (trainer:737) INFO: 15epoch:train:4001-4100batch: iter_time=9.727e-05, forward_time=0.307, loss_ctc=52.957, loss_att=50.254, acc=0.711, loss=51.065, backward_time=0.415, grad_norm=36.114, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.647e-04, train_time=1.721 +[gpub010:0/16] 2024-01-29 10:34:32,145 (trainer:737) INFO: 15epoch:train:4101-4200batch: iter_time=8.700e-05, forward_time=0.330, loss_ctc=53.447, loss_att=52.682, acc=0.712, loss=52.911, backward_time=0.417, grad_norm=33.900, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.647e-04, train_time=1.469 +[gpub010:0/16] 2024-01-29 10:37:00,034 (trainer:737) INFO: 15epoch:train:4201-4300batch: iter_time=8.275e-05, forward_time=0.307, loss_ctc=58.176, loss_att=53.832, acc=0.727, loss=55.135, backward_time=0.417, grad_norm=32.334, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.646e-04, train_time=1.478 +[gpub010:0/16] 2024-01-29 10:39:53,608 (trainer:737) INFO: 15epoch:train:4301-4400batch: iter_time=2.219e-04, forward_time=0.327, loss_ctc=56.976, loss_att=52.627, acc=0.724, loss=53.932, backward_time=0.417, grad_norm=33.035, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.645e-04, train_time=1.736 +[gpub010:0/16] 2024-01-29 10:42:20,684 (trainer:737) INFO: 15epoch:train:4401-4500batch: iter_time=8.086e-05, forward_time=0.350, loss_ctc=55.420, loss_att=56.323, acc=0.711, loss=56.052, backward_time=0.431, grad_norm=30.442, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.645e-04, train_time=1.471 +[gpub010:0/16] 2024-01-29 10:44:57,610 (trainer:737) INFO: 15epoch:train:4501-4600batch: iter_time=8.879e-05, forward_time=0.301, loss_ctc=55.517, loss_att=49.211, acc=0.740, loss=51.103, backward_time=0.402, grad_norm=32.654, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.644e-04, train_time=1.569 +[gpub010:0/16] 2024-01-29 10:47:37,592 (trainer:737) INFO: 15epoch:train:4601-4700batch: iter_time=4.752e-04, forward_time=0.339, loss_ctc=49.654, loss_att=38.175, acc=0.767, loss=41.619, backward_time=0.409, grad_norm=27.585, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=2.644e-04, train_time=1.600 +[gpub010:0/16] 2024-01-29 10:50:11,473 (trainer:737) INFO: 15epoch:train:4701-4800batch: iter_time=8.941e-05, forward_time=0.340, loss_ctc=54.223, loss_att=51.729, acc=0.723, loss=52.477, backward_time=0.419, grad_norm=32.753, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.643e-04, train_time=1.539 +[gpub010:0/16] 2024-01-29 10:52:50,334 (trainer:737) INFO: 15epoch:train:4801-4900batch: iter_time=2.018e-04, forward_time=0.357, loss_ctc=53.776, loss_att=57.011, acc=0.720, loss=56.040, backward_time=0.410, grad_norm=30.194, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.642e-04, train_time=1.588 +[gpub010:0/16] 2024-01-29 10:55:14,569 (trainer:737) INFO: 15epoch:train:4901-5000batch: iter_time=3.389e-04, forward_time=0.324, loss_ctc=60.514, loss_att=59.539, acc=0.708, loss=59.832, backward_time=0.408, grad_norm=38.019, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.642e-04, train_time=1.442 +[gpub010:0/16] 2024-01-29 10:55:34,597 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-01-29 10:55:53,724 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 10:55:57,305 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 10:55:57,306 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-01-29 10:55:57,309 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 11:03:28,989 (trainer:737) INFO: 15epoch:train:5001-5100batch: iter_time=3.491, forward_time=0.339, loss_ctc=49.651, loss_att=49.603, acc=0.707, loss=49.617, backward_time=0.412, grad_norm=31.071, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.641e-04, train_time=4.944 +[gpub010:0/16] 2024-01-29 11:06:08,294 (trainer:737) INFO: 15epoch:train:5101-5200batch: iter_time=9.351e-05, forward_time=0.332, loss_ctc=48.836, loss_att=40.560, acc=0.729, loss=43.043, backward_time=0.460, grad_norm=33.108, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=2.640e-04, train_time=1.592 +[gpub010:0/16] 2024-01-29 11:08:40,639 (trainer:737) INFO: 15epoch:train:5201-5300batch: iter_time=8.094e-05, forward_time=0.341, loss_ctc=52.189, loss_att=52.992, acc=0.713, loss=52.751, backward_time=0.417, grad_norm=32.329, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.640e-04, train_time=1.524 +[gpub010:0/16] 2024-01-29 11:11:22,368 (trainer:737) INFO: 15epoch:train:5301-5400batch: iter_time=8.564e-05, forward_time=0.288, loss_ctc=52.739, loss_att=53.370, acc=0.707, loss=53.180, backward_time=0.399, grad_norm=44.088, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.639e-04, train_time=1.617 +[gpub010:0/16] 2024-01-29 11:14:04,874 (trainer:737) INFO: 15epoch:train:5401-5500batch: iter_time=8.607e-05, forward_time=0.339, loss_ctc=59.481, loss_att=57.010, acc=0.711, loss=57.751, backward_time=0.433, grad_norm=33.257, clip=100.000, loss_scale=3.453e+33, optim_step_time=0.102, optim0_lr0=2.639e-04, train_time=1.624 +[gpub010:0/16] 2024-01-29 11:16:34,546 (trainer:737) INFO: 15epoch:train:5501-5600batch: iter_time=8.439e-05, forward_time=0.335, loss_ctc=51.503, loss_att=46.746, acc=0.742, loss=48.173, backward_time=0.427, grad_norm=30.868, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.638e-04, train_time=1.497 +[gpub010:0/16] 2024-01-29 11:19:26,288 (trainer:737) INFO: 15epoch:train:5601-5700batch: iter_time=2.830e-04, forward_time=0.331, loss_ctc=59.335, loss_att=59.962, acc=0.704, loss=59.774, backward_time=0.430, grad_norm=36.486, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.637e-04, train_time=1.717 +[gpub010:0/16] 2024-01-29 11:21:56,588 (trainer:737) INFO: 15epoch:train:5701-5800batch: iter_time=8.758e-05, forward_time=0.328, loss_ctc=54.899, loss_att=48.671, acc=0.723, loss=50.539, backward_time=0.419, grad_norm=32.423, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.637e-04, train_time=1.502 +[gpub010:0/16] 2024-01-29 11:24:23,791 (trainer:737) INFO: 15epoch:train:5801-5900batch: iter_time=8.543e-05, forward_time=0.304, loss_ctc=53.752, loss_att=44.498, acc=0.770, loss=47.274, backward_time=0.410, grad_norm=28.605, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.636e-04, train_time=1.472 +[gpub010:0/16] 2024-01-29 11:27:13,243 (trainer:737) INFO: 15epoch:train:5901-6000batch: iter_time=8.374e-05, forward_time=0.334, loss_ctc=53.643, loss_att=46.449, acc=0.737, loss=48.607, backward_time=0.448, grad_norm=32.437, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.636e-04, train_time=1.694 +[gpub010:0/16] 2024-01-29 11:29:37,884 (trainer:737) INFO: 15epoch:train:6001-6100batch: iter_time=8.435e-05, forward_time=0.346, loss_ctc=51.206, loss_att=53.000, acc=0.718, loss=52.461, backward_time=0.418, grad_norm=30.983, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.635e-04, train_time=1.446 +[gpub010:0/16] 2024-01-29 11:32:24,634 (trainer:737) INFO: 15epoch:train:6101-6200batch: iter_time=2.307e-04, forward_time=0.333, loss_ctc=54.786, loss_att=55.994, acc=0.717, loss=55.632, backward_time=0.414, grad_norm=30.332, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.634e-04, train_time=1.668 +[gpub010:0/16] 2024-01-29 11:34:06,693 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-01-29 11:34:26,077 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 11:34:29,672 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 11:34:29,672 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-01-29 11:34:29,675 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 11:40:00,519 (trainer:737) INFO: 15epoch:train:6201-6300batch: iter_time=2.942, forward_time=0.339, loss_ctc=54.789, loss_att=55.647, acc=0.718, loss=55.390, backward_time=0.411, grad_norm=34.145, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.634e-04, train_time=4.559 +[gpub010:0/16] 2024-01-29 11:42:16,184 (trainer:737) INFO: 15epoch:train:6301-6400batch: iter_time=9.575e-05, forward_time=0.288, loss_ctc=50.073, loss_att=43.869, acc=0.719, loss=45.730, backward_time=0.399, grad_norm=33.749, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.633e-04, train_time=1.357 +[gpub010:0/16] 2024-01-29 11:45:07,445 (trainer:737) INFO: 15epoch:train:6401-6500batch: iter_time=7.072e-04, forward_time=0.317, loss_ctc=51.213, loss_att=51.370, acc=0.715, loss=51.323, backward_time=0.438, grad_norm=31.424, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.633e-04, train_time=1.712 +[gpub010:0/16] 2024-01-29 11:47:44,069 (trainer:737) INFO: 15epoch:train:6501-6600batch: iter_time=8.959e-05, forward_time=0.330, loss_ctc=51.909, loss_att=50.014, acc=0.711, loss=50.582, backward_time=0.424, grad_norm=40.140, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.632e-04, train_time=1.566 +[gpub010:0/16] 2024-01-29 11:49:57,638 (trainer:737) INFO: 15epoch:train:6601-6700batch: iter_time=8.979e-05, forward_time=0.290, loss_ctc=52.659, loss_att=51.890, acc=0.712, loss=52.121, backward_time=0.402, grad_norm=32.971, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.631e-04, train_time=1.335 +[gpub010:0/16] 2024-01-29 11:52:27,277 (trainer:737) INFO: 15epoch:train:6701-6800batch: iter_time=8.952e-05, forward_time=0.320, loss_ctc=57.650, loss_att=53.232, acc=0.728, loss=54.557, backward_time=0.441, grad_norm=31.821, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.631e-04, train_time=1.496 +[gpub010:0/16] 2024-01-29 11:55:20,359 (trainer:737) INFO: 15epoch:train:6801-6900batch: iter_time=9.056e-05, forward_time=0.348, loss_ctc=56.204, loss_att=51.746, acc=0.726, loss=53.083, backward_time=0.418, grad_norm=32.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.630e-04, train_time=1.730 +[gpub010:0/16] 2024-01-29 11:57:33,401 (trainer:737) INFO: 15epoch:train:6901-7000batch: iter_time=8.673e-05, forward_time=0.292, loss_ctc=54.710, loss_att=55.470, acc=0.714, loss=55.242, backward_time=0.406, grad_norm=30.685, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.630e-04, train_time=1.330 +[gpub010:0/16] 2024-01-29 12:00:02,076 (trainer:737) INFO: 15epoch:train:7001-7100batch: iter_time=8.682e-05, forward_time=0.332, loss_ctc=55.223, loss_att=48.388, acc=0.744, loss=50.439, backward_time=0.435, grad_norm=32.190, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.629e-04, train_time=1.486 +[gpub010:0/16] 2024-01-29 12:02:57,879 (trainer:737) INFO: 15epoch:train:7101-7200batch: iter_time=8.686e-05, forward_time=0.331, loss_ctc=49.133, loss_att=37.621, acc=0.769, loss=41.074, backward_time=0.412, grad_norm=28.475, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.628e-04, train_time=1.759 +[gpub010:0/16] 2024-01-29 12:05:17,645 (trainer:737) INFO: 15epoch:train:7201-7300batch: iter_time=8.419e-05, forward_time=0.322, loss_ctc=53.195, loss_att=52.416, acc=0.720, loss=52.650, backward_time=0.406, grad_norm=31.927, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.628e-04, train_time=1.397 +[gpub010:0/16] 2024-01-29 12:08:13,316 (trainer:737) INFO: 15epoch:train:7301-7400batch: iter_time=8.769e-05, forward_time=0.384, loss_ctc=53.950, loss_att=56.048, acc=0.724, loss=55.419, backward_time=0.452, grad_norm=29.764, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.627e-04, train_time=1.756 +[gpub010:0/16] 2024-01-29 12:11:02,196 (trainer:737) INFO: 15epoch:train:7401-7500batch: iter_time=8.526e-05, forward_time=0.292, loss_ctc=59.290, loss_att=58.921, acc=0.712, loss=59.032, backward_time=0.403, grad_norm=35.828, clip=100.000, loss_scale=6.906e+33, optim_step_time=0.092, optim0_lr0=2.626e-04, train_time=1.689 +[gpub010:0/16] 2024-01-29 12:11:22,237 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-01-29 12:11:41,382 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 12:11:44,959 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 12:11:44,959 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-01-29 12:11:44,963 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 12:19:35,360 (trainer:737) INFO: 15epoch:train:7501-7600batch: iter_time=3.606, forward_time=0.407, loss_ctc=49.422, loss_att=52.502, acc=0.721, loss=51.578, backward_time=0.430, grad_norm=32.020, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.626e-04, train_time=5.131 +[gpub010:0/16] 2024-01-29 12:22:02,517 (trainer:737) INFO: 15epoch:train:7601-7700batch: iter_time=8.166e-05, forward_time=0.288, loss_ctc=48.408, loss_att=41.691, acc=0.739, loss=43.706, backward_time=0.399, grad_norm=31.918, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.625e-04, train_time=1.473 +[gpub010:0/16] 2024-01-29 12:24:54,395 (trainer:737) INFO: 15epoch:train:7701-7800batch: iter_time=8.590e-05, forward_time=0.330, loss_ctc=51.282, loss_att=53.626, acc=0.725, loss=52.923, backward_time=0.446, grad_norm=31.146, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.625e-04, train_time=1.719 +[gpub010:0/16] 2024-01-29 12:27:24,683 (trainer:737) INFO: 15epoch:train:7801-7900batch: iter_time=8.650e-05, forward_time=0.318, loss_ctc=51.106, loss_att=52.990, acc=0.711, loss=52.425, backward_time=0.430, grad_norm=38.309, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.624e-04, train_time=1.502 +[gpub010:0/16] 2024-01-29 12:29:53,805 (trainer:737) INFO: 15epoch:train:7901-8000batch: iter_time=8.440e-05, forward_time=0.313, loss_ctc=59.330, loss_att=57.276, acc=0.725, loss=57.892, backward_time=0.404, grad_norm=33.734, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.623e-04, train_time=1.492 +[gpub010:0/16] 2024-01-29 12:32:37,035 (trainer:737) INFO: 15epoch:train:8001-8100batch: iter_time=8.622e-05, forward_time=0.383, loss_ctc=50.891, loss_att=47.252, acc=0.745, loss=48.344, backward_time=0.440, grad_norm=29.970, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.623e-04, train_time=1.632 +[gpub010:0/16] 2024-01-29 12:35:30,299 (trainer:737) INFO: 15epoch:train:8101-8200batch: iter_time=8.443e-05, forward_time=0.293, loss_ctc=58.618, loss_att=62.710, acc=0.712, loss=61.482, backward_time=0.406, grad_norm=35.274, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.622e-04, train_time=1.732 +[gpub010:0/16] 2024-01-29 12:37:58,289 (trainer:737) INFO: 15epoch:train:8201-8300batch: iter_time=8.314e-05, forward_time=0.359, loss_ctc=54.506, loss_att=49.828, acc=0.731, loss=51.232, backward_time=0.417, grad_norm=32.522, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.622e-04, train_time=1.480 +[gpub010:0/16] 2024-01-29 12:39:48,661 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 12:40:29,238 (trainer:737) INFO: 15epoch:train:8301-8400batch: iter_time=8.201e-05, forward_time=0.370, loss_ctc=53.388, loss_att=46.118, acc=0.770, loss=48.299, backward_time=0.421, grad_norm=29.295, clip=100.000, loss_scale=8.811e+33, optim_step_time=0.094, optim0_lr0=2.621e-04, train_time=1.510 +[gpub010:0/16] 2024-01-29 12:43:13,517 (trainer:737) INFO: 15epoch:train:8401-8500batch: iter_time=8.631e-05, forward_time=0.290, loss_ctc=52.471, loss_att=46.320, acc=0.746, loss=48.165, backward_time=0.402, grad_norm=30.939, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.620e-04, train_time=1.642 +[gpub010:0/16] 2024-01-29 12:46:05,465 (trainer:737) INFO: 15epoch:train:8501-8600batch: iter_time=6.578e-04, forward_time=0.409, loss_ctc=51.087, loss_att=52.756, acc=0.727, loss=52.255, backward_time=0.437, grad_norm=29.531, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.620e-04, train_time=1.720 +[gpub010:0/16] 2024-01-29 12:48:25,807 (trainer:737) INFO: 15epoch:train:8601-8700batch: iter_time=9.042e-05, forward_time=0.330, loss_ctc=53.698, loss_att=56.417, acc=0.727, loss=55.601, backward_time=0.409, grad_norm=31.229, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.619e-04, train_time=1.404 +[gpub010:0/16] 2024-01-29 12:50:28,479 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-01-29 12:50:47,822 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 12:50:51,479 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 12:50:51,479 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-01-29 12:50:51,482 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 12:57:08,037 (trainer:737) INFO: 15epoch:train:8701-8800batch: iter_time=3.371, forward_time=0.371, loss_ctc=54.111, loss_att=54.988, acc=0.736, loss=54.725, backward_time=0.413, grad_norm=33.418, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.619e-04, train_time=5.221 +[gpub010:0/16] 2024-01-29 12:59:28,733 (trainer:737) INFO: 15epoch:train:8801-8900batch: iter_time=8.122e-05, forward_time=0.289, loss_ctc=49.874, loss_att=43.697, acc=0.728, loss=45.550, backward_time=0.402, grad_norm=32.788, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.618e-04, train_time=1.408 +[gpub010:0/16] 2024-01-29 13:02:14,371 (trainer:737) INFO: 15epoch:train:8901-9000batch: iter_time=8.476e-05, forward_time=0.294, loss_ctc=51.317, loss_att=51.933, acc=0.729, loss=51.748, backward_time=0.403, grad_norm=31.517, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.617e-04, train_time=1.656 +[gpub010:0/16] 2024-01-29 13:04:45,633 (trainer:737) INFO: 15epoch:train:9001-9100batch: iter_time=8.804e-05, forward_time=0.367, loss_ctc=51.889, loss_att=50.655, acc=0.717, loss=51.025, backward_time=0.449, grad_norm=38.551, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.617e-04, train_time=1.512 +[gpub010:0/16] 2024-01-29 13:09:22,579 (trainer:737) INFO: 15epoch:train:9101-9200batch: iter_time=0.093, forward_time=0.289, loss_ctc=52.721, loss_att=52.996, acc=0.721, loss=52.913, backward_time=0.402, grad_norm=35.140, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.616e-04, train_time=2.769 +[gpub010:0/16] 2024-01-29 13:12:18,996 (trainer:737) INFO: 15epoch:train:9201-9300batch: iter_time=8.763e-05, forward_time=0.400, loss_ctc=56.660, loss_att=53.305, acc=0.738, loss=54.311, backward_time=0.437, grad_norm=30.989, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.616e-04, train_time=1.764 +[gpub010:0/16] 2024-01-29 13:15:01,238 (trainer:737) INFO: 15epoch:train:9301-9400batch: iter_time=8.105e-05, forward_time=0.292, loss_ctc=56.220, loss_att=54.807, acc=0.725, loss=55.231, backward_time=0.404, grad_norm=32.055, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.615e-04, train_time=1.621 +[gpub010:0/16] 2024-01-29 13:17:27,473 (trainer:737) INFO: 15epoch:train:9401-9500batch: iter_time=8.556e-05, forward_time=0.293, loss_ctc=55.047, loss_att=55.845, acc=0.725, loss=55.606, backward_time=0.406, grad_norm=30.155, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.614e-04, train_time=1.463 +[gpub010:0/16] 2024-01-29 13:20:37,156 (trainer:737) INFO: 15epoch:train:9501-9600batch: iter_time=4.592e-04, forward_time=0.459, loss_ctc=54.435, loss_att=49.019, acc=0.751, loss=50.644, backward_time=0.438, grad_norm=31.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.614e-04, train_time=1.896 +[gpub010:0/16] 2024-01-29 13:23:07,525 (trainer:737) INFO: 15epoch:train:9601-9700batch: iter_time=8.049e-05, forward_time=0.291, loss_ctc=49.226, loss_att=38.675, acc=0.773, loss=41.840, backward_time=0.402, grad_norm=26.188, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.613e-04, train_time=1.504 +[gpub010:0/16] 2024-01-29 13:25:56,990 (trainer:737) INFO: 15epoch:train:9701-9800batch: iter_time=3.040e-04, forward_time=0.360, loss_ctc=52.202, loss_att=51.554, acc=0.731, loss=51.748, backward_time=0.479, grad_norm=31.616, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.613e-04, train_time=1.695 +[gpub010:0/16] 2024-01-29 13:28:50,572 (trainer:737) INFO: 15epoch:train:9801-9900batch: iter_time=8.247e-05, forward_time=0.296, loss_ctc=53.709, loss_att=57.051, acc=0.731, loss=56.048, backward_time=0.405, grad_norm=30.182, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.612e-04, train_time=1.735 +[gpub010:0/16] 2024-01-29 13:31:14,208 (trainer:737) INFO: 15epoch:train:9901-10000batch: iter_time=8.179e-05, forward_time=0.293, loss_ctc=59.427, loss_att=59.480, acc=0.719, loss=59.464, backward_time=0.406, grad_norm=35.147, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.612e-04, train_time=1.437 +[gpub010:0/16] 2024-01-29 13:31:34,396 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-01-29 13:31:53,544 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 13:31:57,162 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 13:31:57,162 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-01-29 13:31:57,165 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 13:40:28,693 (trainer:737) INFO: 15epoch:train:10001-10100batch: iter_time=3.986, forward_time=0.421, loss_ctc=49.490, loss_att=48.371, acc=0.729, loss=48.706, backward_time=0.423, grad_norm=30.268, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.611e-04, train_time=5.545 +[gpub010:0/16] 2024-01-29 13:43:13,052 (trainer:737) INFO: 15epoch:train:10101-10200batch: iter_time=8.139e-05, forward_time=0.291, loss_ctc=48.379, loss_att=40.382, acc=0.744, loss=42.781, backward_time=0.398, grad_norm=34.670, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.610e-04, train_time=1.643 +[gpub010:0/16] 2024-01-29 13:46:09,626 (trainer:737) INFO: 15epoch:train:10201-10300batch: iter_time=8.325e-05, forward_time=0.388, loss_ctc=51.254, loss_att=52.194, acc=0.728, loss=51.912, backward_time=0.445, grad_norm=30.988, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.610e-04, train_time=1.766 +[gpub010:0/16] 2024-01-29 13:48:21,643 (trainer:737) INFO: 15epoch:train:10301-10400batch: iter_time=8.620e-05, forward_time=0.290, loss_ctc=50.679, loss_att=52.805, acc=0.713, loss=52.167, backward_time=0.399, grad_norm=35.898, clip=100.000, loss_scale=6.750e+33, optim_step_time=0.092, optim0_lr0=2.609e-04, train_time=1.320 +[gpub010:0/16] 2024-01-29 13:51:06,840 (trainer:737) INFO: 15epoch:train:10401-10500batch: iter_time=8.790e-05, forward_time=0.293, loss_ctc=59.737, loss_att=56.416, acc=0.728, loss=57.412, backward_time=0.405, grad_norm=33.777, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.609e-04, train_time=1.650 +[gpub010:0/16] 2024-01-29 13:51:37,679 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 13:54:22,503 (trainer:737) INFO: 15epoch:train:10501-10600batch: iter_time=2.191e-04, forward_time=0.402, loss_ctc=50.680, loss_att=46.321, acc=0.746, loss=47.629, backward_time=0.464, grad_norm=30.798, clip=100.000, loss_scale=6.031e+33, optim_step_time=0.101, optim0_lr0=2.608e-04, train_time=1.958 +[gpub010:0/16] 2024-01-29 13:56:37,541 (trainer:737) INFO: 15epoch:train:10601-10700batch: iter_time=8.468e-05, forward_time=0.295, loss_ctc=58.172, loss_att=61.706, acc=0.714, loss=60.646, backward_time=0.408, grad_norm=34.958, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.607e-04, train_time=1.351 +[gpub010:0/16] 2024-01-29 13:59:30,969 (trainer:737) INFO: 15epoch:train:10701-10800batch: iter_time=8.979e-05, forward_time=0.389, loss_ctc=54.257, loss_att=49.328, acc=0.733, loss=50.807, backward_time=0.451, grad_norm=32.258, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.607e-04, train_time=1.734 +[gpub010:0/16] 2024-01-29 14:02:17,067 (trainer:737) INFO: 15epoch:train:10801-10900batch: iter_time=8.230e-05, forward_time=0.293, loss_ctc=52.612, loss_att=45.550, acc=0.772, loss=47.669, backward_time=0.405, grad_norm=28.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.606e-04, train_time=1.660 +[gpub010:0/16] 2024-01-29 14:04:42,039 (trainer:737) INFO: 15epoch:train:10901-11000batch: iter_time=8.531e-05, forward_time=0.292, loss_ctc=51.972, loss_att=45.854, acc=0.748, loss=47.689, backward_time=0.404, grad_norm=29.946, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.606e-04, train_time=1.450 +[gpub010:0/16] 2024-01-29 14:08:08,965 (trainer:737) INFO: 15epoch:train:11001-11100batch: iter_time=5.735e-04, forward_time=0.476, loss_ctc=50.318, loss_att=52.137, acc=0.731, loss=51.591, backward_time=0.444, grad_norm=28.915, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.116, optim0_lr0=2.605e-04, train_time=2.069 +[gpub010:0/16] 2024-01-29 14:10:21,656 (trainer:737) INFO: 15epoch:train:11101-11200batch: iter_time=8.806e-05, forward_time=0.294, loss_ctc=53.677, loss_att=55.724, acc=0.729, loss=55.110, backward_time=0.408, grad_norm=30.443, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.604e-04, train_time=1.326 +[gpub010:0/16] 2024-01-29 14:11:55,562 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-01-29 14:12:14,926 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 14:12:18,505 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 14:12:18,505 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-01-29 14:12:18,509 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 14:19:24,518 (trainer:737) INFO: 15epoch:train:11201-11300batch: iter_time=3.751, forward_time=0.403, loss_ctc=53.865, loss_att=57.807, acc=0.718, loss=56.624, backward_time=0.420, grad_norm=35.476, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.604e-04, train_time=5.429 +[gpub010:0/16] 2024-01-29 14:21:35,995 (trainer:737) INFO: 15epoch:train:11301-11400batch: iter_time=8.618e-05, forward_time=0.289, loss_ctc=49.825, loss_att=44.798, acc=0.719, loss=46.306, backward_time=0.400, grad_norm=32.754, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.603e-04, train_time=1.315 +[gpub010:0/16] 2024-01-29 14:23:49,791 (trainer:737) INFO: 15epoch:train:11401-11500batch: iter_time=7.744e-05, forward_time=0.291, loss_ctc=50.485, loss_att=52.760, acc=0.714, loss=52.078, backward_time=0.402, grad_norm=32.181, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.603e-04, train_time=1.337 +[gpub010:0/16] 2024-01-29 14:27:33,441 (trainer:737) INFO: 15epoch:train:11501-11600batch: iter_time=9.481e-05, forward_time=0.393, loss_ctc=50.639, loss_att=49.916, acc=0.714, loss=50.133, backward_time=0.435, grad_norm=36.800, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.602e-04, train_time=2.237 +[gpub010:0/16] 2024-01-29 14:30:09,372 (trainer:737) INFO: 15epoch:train:11601-11700batch: iter_time=8.297e-05, forward_time=0.291, loss_ctc=52.271, loss_att=52.059, acc=0.717, loss=52.122, backward_time=0.401, grad_norm=33.811, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.601e-04, train_time=1.559 +[gpub010:0/16] 2024-01-29 14:33:22,042 (trainer:737) INFO: 15epoch:train:11701-11800batch: iter_time=8.445e-05, forward_time=0.436, loss_ctc=57.245, loss_att=52.933, acc=0.731, loss=54.227, backward_time=0.435, grad_norm=34.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.601e-04, train_time=1.926 +[gpub010:0/16] 2024-01-29 14:36:08,024 (trainer:737) INFO: 15epoch:train:11801-11900batch: iter_time=8.592e-05, forward_time=0.290, loss_ctc=55.793, loss_att=52.168, acc=0.726, loss=53.255, backward_time=0.404, grad_norm=34.009, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.600e-04, train_time=1.660 +[gpub010:0/16] 2024-01-29 14:38:52,635 (trainer:737) INFO: 15epoch:train:11901-12000batch: iter_time=8.818e-05, forward_time=0.345, loss_ctc=54.354, loss_att=55.230, acc=0.716, loss=54.967, backward_time=0.435, grad_norm=29.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.600e-04, train_time=1.646 +[gpub010:0/16] 2024-01-29 14:41:47,463 (trainer:737) INFO: 15epoch:train:12001-12100batch: iter_time=9.068e-05, forward_time=0.328, loss_ctc=54.204, loss_att=48.442, acc=0.744, loss=50.171, backward_time=0.420, grad_norm=32.392, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.599e-04, train_time=1.748 +[gpub010:0/16] 2024-01-29 14:44:38,133 (trainer:737) INFO: 15epoch:train:12101-12200batch: iter_time=8.562e-05, forward_time=0.291, loss_ctc=48.349, loss_att=37.310, acc=0.772, loss=40.621, backward_time=0.400, grad_norm=26.231, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.599e-04, train_time=1.707 +[gpub010:0/16] 2024-01-29 14:47:51,125 (trainer:737) INFO: 15epoch:train:12201-12300batch: iter_time=4.173e-04, forward_time=0.376, loss_ctc=52.121, loss_att=51.184, acc=0.726, loss=51.465, backward_time=0.473, grad_norm=32.047, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.598e-04, train_time=1.929 +[gpub010:0/16] 2024-01-29 14:50:22,860 (trainer:737) INFO: 15epoch:train:12301-12400batch: iter_time=8.540e-05, forward_time=0.292, loss_ctc=52.808, loss_att=56.396, acc=0.723, loss=55.319, backward_time=0.402, grad_norm=31.254, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.597e-04, train_time=1.517 +[gpub010:0/16] 2024-01-29 14:53:13,653 (trainer:737) INFO: 15epoch:train:12401-12500batch: iter_time=9.690e-05, forward_time=0.304, loss_ctc=58.703, loss_att=58.944, acc=0.713, loss=58.872, backward_time=0.417, grad_norm=37.899, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.597e-04, train_time=1.707 +[gpub010:0/16] 2024-01-29 14:53:33,802 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-01-29 14:53:53,286 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 14:53:56,901 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 14:53:56,902 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-01-29 14:53:56,905 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 15:01:27,572 (trainer:737) INFO: 15epoch:train:12501-12600batch: iter_time=3.139, forward_time=0.449, loss_ctc=48.689, loss_att=48.809, acc=0.711, loss=48.773, backward_time=0.430, grad_norm=31.028, clip=100.000, loss_scale=9.502e+33, optim_step_time=0.097, optim0_lr0=2.596e-04, train_time=4.940 +[gpub010:0/16] 2024-01-29 15:04:23,839 (trainer:737) INFO: 15epoch:train:12601-12700batch: iter_time=8.859e-05, forward_time=0.395, loss_ctc=47.512, loss_att=40.086, acc=0.734, loss=42.314, backward_time=0.456, grad_norm=34.827, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.596e-04, train_time=1.763 +[gpub010:0/16] 2024-01-29 15:07:13,592 (trainer:737) INFO: 15epoch:train:12701-12800batch: iter_time=9.020e-05, forward_time=0.290, loss_ctc=51.129, loss_att=52.715, acc=0.715, loss=52.240, backward_time=0.401, grad_norm=31.167, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.595e-04, train_time=1.697 +[gpub010:0/16] 2024-01-29 15:10:14,084 (trainer:737) INFO: 15epoch:train:12801-12900batch: iter_time=9.034e-05, forward_time=0.388, loss_ctc=50.066, loss_att=52.229, acc=0.713, loss=51.580, backward_time=0.492, grad_norm=37.619, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=2.594e-04, train_time=1.805 +[gpub010:0/16] 2024-01-29 15:12:09,633 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 15:12:48,530 (trainer:737) INFO: 15epoch:train:12901-13000batch: iter_time=9.407e-05, forward_time=0.291, loss_ctc=58.887, loss_att=56.673, acc=0.715, loss=57.337, backward_time=0.402, grad_norm=32.504, clip=100.000, loss_scale=9.283e+33, optim_step_time=0.092, optim0_lr0=2.594e-04, train_time=1.544 +[gpub010:0/16] 2024-01-29 15:16:16,790 (trainer:737) INFO: 15epoch:train:13001-13100batch: iter_time=9.826e-05, forward_time=0.416, loss_ctc=50.255, loss_att=45.960, acc=0.747, loss=47.249, backward_time=0.444, grad_norm=31.062, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.121, optim0_lr0=2.593e-04, train_time=2.083 +[gpub010:0/16] 2024-01-29 15:18:51,743 (trainer:737) INFO: 15epoch:train:13101-13200batch: iter_time=9.718e-05, forward_time=0.294, loss_ctc=58.447, loss_att=59.181, acc=0.708, loss=58.961, backward_time=0.406, grad_norm=34.359, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.593e-04, train_time=1.549 +[gpub010:0/16] 2024-01-29 15:22:25,592 (trainer:737) INFO: 15epoch:train:13201-13300batch: iter_time=9.715e-05, forward_time=0.469, loss_ctc=53.663, loss_att=47.475, acc=0.730, loss=49.332, backward_time=0.444, grad_norm=31.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.592e-04, train_time=2.138 +[gpub010:0/16] 2024-01-29 15:25:49,856 (trainer:737) INFO: 15epoch:train:13301-13400batch: iter_time=1.006e-04, forward_time=0.447, loss_ctc=52.691, loss_att=43.945, acc=0.774, loss=46.569, backward_time=0.476, grad_norm=28.070, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.592e-04, train_time=2.041 +[gpub010:0/16] 2024-01-29 15:28:15,930 (trainer:737) INFO: 15epoch:train:13401-13500batch: iter_time=8.576e-05, forward_time=0.291, loss_ctc=51.237, loss_att=45.936, acc=0.742, loss=47.526, backward_time=0.403, grad_norm=32.010, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.591e-04, train_time=1.461 +[gpub010:0/16] 2024-01-29 15:31:16,682 (trainer:737) INFO: 15epoch:train:13501-13600batch: iter_time=2.008e-04, forward_time=0.372, loss_ctc=50.405, loss_att=52.119, acc=0.722, loss=51.605, backward_time=0.453, grad_norm=29.699, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.590e-04, train_time=1.807 +[gpub010:0/16] 2024-01-29 15:34:20,660 (trainer:737) INFO: 15epoch:train:13601-13700batch: iter_time=1.016e-04, forward_time=0.291, loss_ctc=53.407, loss_att=55.148, acc=0.721, loss=54.625, backward_time=0.402, grad_norm=30.748, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.590e-04, train_time=1.840 +[gpub010:0/16] 2024-01-29 15:35:46,239 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-01-29 15:36:05,859 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 15:36:09,531 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 15:36:09,531 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-01-29 15:36:09,535 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 15:43:01,852 (trainer:737) INFO: 15epoch:train:13701-13800batch: iter_time=3.531, forward_time=0.363, loss_ctc=53.925, loss_att=57.277, acc=0.730, loss=56.272, backward_time=0.417, grad_norm=36.508, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.589e-04, train_time=5.212 +[gpub010:0/16] 2024-01-29 15:45:43,100 (trainer:737) INFO: 15epoch:train:13801-13900batch: iter_time=8.605e-05, forward_time=0.287, loss_ctc=49.175, loss_att=44.422, acc=0.728, loss=45.848, backward_time=0.399, grad_norm=33.821, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.589e-04, train_time=1.612 +[gpub010:0/16] 2024-01-29 15:48:30,028 (trainer:737) INFO: 15epoch:train:13901-14000batch: iter_time=8.286e-05, forward_time=0.421, loss_ctc=51.187, loss_att=52.816, acc=0.729, loss=52.327, backward_time=0.446, grad_norm=31.168, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.588e-04, train_time=1.669 +[gpub010:0/16] 2024-01-29 15:51:21,308 (trainer:737) INFO: 15epoch:train:14001-14100batch: iter_time=8.446e-05, forward_time=0.289, loss_ctc=50.263, loss_att=49.312, acc=0.720, loss=49.597, backward_time=0.400, grad_norm=36.224, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.588e-04, train_time=1.712 +[gpub010:0/16] 2024-01-29 15:54:24,061 (trainer:737) INFO: 15epoch:train:14101-14200batch: iter_time=8.911e-05, forward_time=0.439, loss_ctc=51.486, loss_att=52.235, acc=0.725, loss=52.010, backward_time=0.439, grad_norm=32.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.117, optim0_lr0=2.587e-04, train_time=1.826 +[gpub010:0/16] 2024-01-29 15:56:55,778 (trainer:737) INFO: 15epoch:train:14201-14300batch: iter_time=9.120e-05, forward_time=0.292, loss_ctc=55.862, loss_att=53.308, acc=0.738, loss=54.075, backward_time=0.403, grad_norm=32.542, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.586e-04, train_time=1.518 +[gpub010:0/16] 2024-01-29 16:00:19,596 (trainer:737) INFO: 15epoch:train:14301-14400batch: iter_time=8.931e-05, forward_time=0.481, loss_ctc=55.092, loss_att=54.110, acc=0.727, loss=54.405, backward_time=0.436, grad_norm=33.855, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.586e-04, train_time=2.037 +[gpub010:0/16] 2024-01-29 16:02:55,557 (trainer:737) INFO: 15epoch:train:14401-14500batch: iter_time=9.292e-05, forward_time=0.294, loss_ctc=53.659, loss_att=54.739, acc=0.728, loss=54.415, backward_time=0.406, grad_norm=29.723, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.585e-04, train_time=1.560 +[gpub010:0/16] 2024-01-29 16:05:57,883 (trainer:737) INFO: 15epoch:train:14501-14600batch: iter_time=9.434e-05, forward_time=0.441, loss_ctc=53.374, loss_att=48.567, acc=0.753, loss=50.009, backward_time=0.431, grad_norm=32.232, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.585e-04, train_time=1.822 +[gpub010:0/16] 2024-01-29 16:09:11,179 (trainer:737) INFO: 15epoch:train:14601-14700batch: iter_time=8.968e-05, forward_time=0.471, loss_ctc=48.752, loss_att=38.493, acc=0.775, loss=41.571, backward_time=0.430, grad_norm=27.370, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.584e-04, train_time=1.933 +[gpub010:0/16] 2024-01-29 16:11:29,211 (trainer:737) INFO: 15epoch:train:14701-14800batch: iter_time=9.207e-05, forward_time=0.291, loss_ctc=51.392, loss_att=51.216, acc=0.732, loss=51.269, backward_time=0.406, grad_norm=31.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.583e-04, train_time=1.379 +[gpub010:0/16] 2024-01-29 16:15:00,233 (trainer:737) INFO: 15epoch:train:14801-14900batch: iter_time=9.204e-05, forward_time=0.438, loss_ctc=52.760, loss_att=56.815, acc=0.732, loss=55.599, backward_time=0.430, grad_norm=30.846, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.116, optim0_lr0=2.583e-04, train_time=2.111 +[gpub010:0/16] 2024-01-29 16:17:44,626 (trainer:737) INFO: 15epoch:train:14901-15000batch: iter_time=8.669e-05, forward_time=0.291, loss_ctc=57.904, loss_att=58.602, acc=0.722, loss=58.392, backward_time=0.405, grad_norm=41.731, clip=100.000, loss_scale=6.283e+33, optim_step_time=0.092, optim0_lr0=2.582e-04, train_time=1.643 +[gpub010:0/16] 2024-01-29 16:57:12,750 (trainer:343) INFO: 15epoch results: [train] iter_time=0.275, forward_time=0.337, loss_ctc=53.778, loss_att=51.619, acc=0.727, loss=52.267, backward_time=0.420, grad_norm=32.798, clip=100.000, loss_scale=5.030e+33, optim_step_time=0.096, optim0_lr0=2.627e-04, train_time=1.912, time=7 hours, 58 minutes and 24.71 seconds, total_count=255000, gpu_max_cached_mem_GB=42.072, [valid] loss_ctc=41.359, cer_ctc=0.217, loss_att=44.311, acc=0.641, cer=0.329, wer=0.998, loss=43.425, time=39 minutes and 3.54 seconds, total_count=79407, gpu_max_cached_mem_GB=42.072 +[gpub010:0/16] 2024-01-29 16:57:29,466 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-01-29 16:57:29,653 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/10epoch.pth +[gpub010:0/16] 2024-01-29 16:57:29,874 (trainer:272) INFO: 16/45epoch started. Estimated time to finish: 1 week, 3 days and 18 hours +[gpub010:0/16] 2024-01-29 16:57:30,411 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-01-29 16:57:49,060 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 16:57:52,558 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 16:57:52,558 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-01-29 16:57:52,561 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 17:10:30,162 (trainer:737) INFO: 16epoch:train:1-100batch: iter_time=3.592, forward_time=0.360, loss_ctc=49.086, loss_att=45.905, acc=0.733, loss=46.859, backward_time=0.411, grad_norm=29.922, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.582e-04, train_time=7.800 +[gpub010:0/16] 2024-01-29 17:12:44,670 (trainer:737) INFO: 16epoch:train:101-200batch: iter_time=8.275e-05, forward_time=0.291, loss_ctc=57.996, loss_att=55.841, acc=0.713, loss=56.487, backward_time=0.403, grad_norm=42.880, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.581e-04, train_time=1.345 +[gpub010:0/16] 2024-01-29 17:15:19,735 (trainer:737) INFO: 16epoch:train:201-300batch: iter_time=2.335e-04, forward_time=0.295, loss_ctc=58.848, loss_att=55.518, acc=0.711, loss=56.517, backward_time=0.407, grad_norm=42.403, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.581e-04, train_time=1.550 +[gpub010:0/16] 2024-01-29 17:18:32,282 (trainer:737) INFO: 16epoch:train:301-400batch: iter_time=2.286e-04, forward_time=0.400, loss_ctc=48.977, loss_att=51.671, acc=0.728, loss=50.863, backward_time=0.448, grad_norm=31.162, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.113, optim0_lr0=2.580e-04, train_time=1.926 +[gpub010:0/16] 2024-01-29 17:20:49,132 (trainer:737) INFO: 16epoch:train:401-500batch: iter_time=8.374e-05, forward_time=0.293, loss_ctc=57.241, loss_att=56.687, acc=0.735, loss=56.853, backward_time=0.407, grad_norm=34.400, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.579e-04, train_time=1.368 +[gpub010:0/16] 2024-01-29 17:23:24,432 (trainer:737) INFO: 16epoch:train:501-600batch: iter_time=5.541e-04, forward_time=0.319, loss_ctc=47.604, loss_att=48.391, acc=0.737, loss=48.155, backward_time=0.413, grad_norm=29.347, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.579e-04, train_time=1.552 +[gpub010:0/16] 2024-01-29 17:26:04,081 (trainer:737) INFO: 16epoch:train:601-700batch: iter_time=8.448e-05, forward_time=0.408, loss_ctc=56.093, loss_att=54.736, acc=0.738, loss=55.143, backward_time=0.449, grad_norm=32.338, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.578e-04, train_time=1.597 +[gpub010:0/16] 2024-01-29 17:28:52,702 (trainer:737) INFO: 16epoch:train:701-800batch: iter_time=9.548e-05, forward_time=0.289, loss_ctc=50.803, loss_att=46.168, acc=0.730, loss=47.558, backward_time=0.401, grad_norm=33.644, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.578e-04, train_time=1.686 +[gpub010:0/16] 2024-01-29 17:31:36,105 (trainer:737) INFO: 16epoch:train:801-900batch: iter_time=7.407e-04, forward_time=0.372, loss_ctc=48.795, loss_att=46.036, acc=0.727, loss=46.864, backward_time=0.417, grad_norm=32.136, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.118, optim0_lr0=2.577e-04, train_time=1.633 +[gpub010:0/16] 2024-01-29 17:34:02,237 (trainer:737) INFO: 16epoch:train:901-1000batch: iter_time=8.307e-05, forward_time=0.330, loss_ctc=47.238, loss_att=45.668, acc=0.722, loss=46.139, backward_time=0.412, grad_norm=31.657, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.577e-04, train_time=1.462 +[gpub010:0/16] 2024-01-29 17:36:27,244 (trainer:737) INFO: 16epoch:train:1001-1100batch: iter_time=8.825e-05, forward_time=0.290, loss_ctc=51.191, loss_att=46.191, acc=0.744, loss=47.691, backward_time=0.405, grad_norm=29.476, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.576e-04, train_time=1.450 +[gpub010:0/16] 2024-01-29 17:39:35,055 (trainer:737) INFO: 16epoch:train:1101-1200batch: iter_time=0.002, forward_time=0.300, loss_ctc=55.971, loss_att=50.606, acc=0.733, loss=52.216, backward_time=0.413, grad_norm=33.161, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.575e-04, train_time=1.878 +[gpub010:0/16] 2024-01-29 17:41:15,293 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-01-29 17:41:34,218 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 17:41:37,861 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 17:41:37,862 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-01-29 17:41:37,865 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 17:48:15,206 (trainer:737) INFO: 16epoch:train:1201-1300batch: iter_time=3.620, forward_time=0.340, loss_ctc=57.363, loss_att=57.998, acc=0.723, loss=57.807, backward_time=0.477, grad_norm=41.831, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.575e-04, train_time=5.201 +[gpub010:0/16] 2024-01-29 17:50:31,653 (trainer:737) INFO: 16epoch:train:1301-1400batch: iter_time=1.317e-04, forward_time=0.294, loss_ctc=55.478, loss_att=51.447, acc=0.726, loss=52.657, backward_time=0.400, grad_norm=38.997, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.574e-04, train_time=1.364 +[gpub010:0/16] 2024-01-29 17:53:26,684 (trainer:737) INFO: 16epoch:train:1401-1500batch: iter_time=3.461e-04, forward_time=0.400, loss_ctc=50.544, loss_att=51.428, acc=0.714, loss=51.163, backward_time=0.461, grad_norm=36.767, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.574e-04, train_time=1.749 +[gpub010:0/16] 2024-01-29 17:56:05,992 (trainer:737) INFO: 16epoch:train:1501-1600batch: iter_time=9.148e-05, forward_time=0.293, loss_ctc=54.820, loss_att=55.069, acc=0.736, loss=54.994, backward_time=0.405, grad_norm=38.639, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.573e-04, train_time=1.593 +[gpub010:0/16] 2024-01-29 17:58:28,278 (trainer:737) INFO: 16epoch:train:1601-1700batch: iter_time=4.694e-04, forward_time=0.299, loss_ctc=52.289, loss_att=52.865, acc=0.737, loss=52.692, backward_time=0.408, grad_norm=33.902, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.573e-04, train_time=1.424 +[gpub010:0/16] 2024-01-29 18:01:19,957 (trainer:737) INFO: 16epoch:train:1701-1800batch: iter_time=8.684e-05, forward_time=0.366, loss_ctc=55.624, loss_att=59.013, acc=0.740, loss=57.996, backward_time=0.468, grad_norm=31.900, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.572e-04, train_time=1.715 +[gpub010:0/16] 2024-01-29 18:03:44,801 (trainer:737) INFO: 16epoch:train:1801-1900batch: iter_time=2.302e-04, forward_time=0.300, loss_ctc=51.542, loss_att=52.377, acc=0.742, loss=52.127, backward_time=0.408, grad_norm=31.244, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.571e-04, train_time=1.448 +[gpub010:0/16] 2024-01-29 18:06:22,172 (trainer:737) INFO: 16epoch:train:1901-2000batch: iter_time=9.707e-05, forward_time=0.300, loss_ctc=50.786, loss_att=50.192, acc=0.746, loss=50.370, backward_time=0.415, grad_norm=31.752, clip=100.000, loss_scale=1.257e+34, optim_step_time=0.092, optim0_lr0=2.571e-04, train_time=1.575 +[gpub010:0/16] 2024-01-29 18:09:06,539 (trainer:737) INFO: 16epoch:train:2001-2100batch: iter_time=9.194e-05, forward_time=0.367, loss_ctc=46.842, loss_att=42.770, acc=0.742, loss=43.991, backward_time=0.439, grad_norm=29.627, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.100, optim0_lr0=2.570e-04, train_time=1.643 +[gpub010:0/16] 2024-01-29 18:09:34,750 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 18:11:26,279 (trainer:737) INFO: 16epoch:train:2101-2200batch: iter_time=9.950e-05, forward_time=0.299, loss_ctc=49.162, loss_att=50.034, acc=0.724, loss=49.772, backward_time=0.403, grad_norm=32.569, clip=100.000, loss_scale=1.238e+34, optim_step_time=0.092, optim0_lr0=2.570e-04, train_time=1.398 +[gpub010:0/16] 2024-01-29 18:13:57,476 (trainer:737) INFO: 16epoch:train:2201-2300batch: iter_time=9.376e-05, forward_time=0.296, loss_ctc=50.123, loss_att=45.654, acc=0.752, loss=46.995, backward_time=0.411, grad_norm=29.606, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.569e-04, train_time=1.511 +[gpub010:0/16] 2024-01-29 18:17:11,437 (trainer:737) INFO: 16epoch:train:2301-2400batch: iter_time=8.238e-05, forward_time=0.411, loss_ctc=53.430, loss_att=50.295, acc=0.743, loss=51.235, backward_time=0.458, grad_norm=33.380, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.569e-04, train_time=1.939 +[gpub010:0/16] 2024-01-29 18:19:33,722 (trainer:737) INFO: 16epoch:train:2401-2500batch: iter_time=9.199e-04, forward_time=0.298, loss_ctc=58.438, loss_att=52.310, acc=0.734, loss=54.148, backward_time=0.405, grad_norm=39.413, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.568e-04, train_time=1.423 +[gpub010:0/16] 2024-01-29 18:19:53,780 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-01-29 18:20:12,545 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 18:20:16,389 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 18:20:16,389 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-01-29 18:20:16,392 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 18:27:21,397 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 18:28:05,473 (trainer:737) INFO: 16epoch:train:2501-2600batch: iter_time=3.451, forward_time=0.351, loss_ctc=48.108, loss_att=44.935, acc=0.745, loss=45.887, backward_time=0.459, grad_norm=28.544, clip=100.000, loss_scale=9.178e+33, optim_step_time=0.102, optim0_lr0=2.568e-04, train_time=5.117 +[gpub010:0/16] 2024-01-29 18:30:43,572 (trainer:737) INFO: 16epoch:train:2601-2700batch: iter_time=8.533e-05, forward_time=0.325, loss_ctc=54.781, loss_att=56.214, acc=0.715, loss=55.784, backward_time=0.406, grad_norm=42.474, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.567e-04, train_time=1.580 +[gpub010:0/16] 2024-01-29 18:33:05,350 (trainer:737) INFO: 16epoch:train:2701-2800batch: iter_time=8.414e-05, forward_time=0.297, loss_ctc=55.926, loss_att=55.130, acc=0.715, loss=55.369, backward_time=0.410, grad_norm=41.404, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.566e-04, train_time=1.418 +[gpub010:0/16] 2024-01-29 18:35:52,865 (trainer:737) INFO: 16epoch:train:2801-2900batch: iter_time=8.189e-05, forward_time=0.415, loss_ctc=48.496, loss_att=52.274, acc=0.734, loss=51.141, backward_time=0.437, grad_norm=30.830, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.566e-04, train_time=1.675 +[gpub010:0/16] 2024-01-29 18:38:24,764 (trainer:737) INFO: 16epoch:train:2901-3000batch: iter_time=8.425e-05, forward_time=0.294, loss_ctc=56.327, loss_att=55.973, acc=0.749, loss=56.079, backward_time=0.406, grad_norm=31.278, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.565e-04, train_time=1.518 +[gpub010:0/16] 2024-01-29 18:40:59,634 (trainer:737) INFO: 16epoch:train:3001-3100batch: iter_time=9.236e-05, forward_time=0.298, loss_ctc=46.873, loss_att=50.569, acc=0.743, loss=49.460, backward_time=0.411, grad_norm=29.230, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.565e-04, train_time=1.550 +[gpub010:0/16] 2024-01-29 18:43:44,448 (trainer:737) INFO: 16epoch:train:3101-3200batch: iter_time=8.158e-05, forward_time=0.357, loss_ctc=54.650, loss_att=54.115, acc=0.747, loss=54.276, backward_time=0.459, grad_norm=30.938, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.564e-04, train_time=1.648 +[gpub010:0/16] 2024-01-29 18:46:12,200 (trainer:737) INFO: 16epoch:train:3201-3300batch: iter_time=8.354e-05, forward_time=0.290, loss_ctc=49.921, loss_att=45.806, acc=0.739, loss=47.041, backward_time=0.401, grad_norm=31.596, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.564e-04, train_time=1.477 +[gpub010:0/16] 2024-01-29 18:48:50,657 (trainer:737) INFO: 16epoch:train:3301-3400batch: iter_time=1.470e-04, forward_time=0.298, loss_ctc=48.207, loss_att=47.023, acc=0.740, loss=47.378, backward_time=0.408, grad_norm=30.389, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.563e-04, train_time=1.584 +[gpub010:0/16] 2024-01-29 18:52:10,958 (trainer:737) INFO: 16epoch:train:3401-3500batch: iter_time=8.233e-05, forward_time=0.400, loss_ctc=46.305, loss_att=44.505, acc=0.742, loss=45.045, backward_time=0.444, grad_norm=29.429, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.562e-04, train_time=2.003 +[gpub010:0/16] 2024-01-29 18:54:29,836 (trainer:737) INFO: 16epoch:train:3501-3600batch: iter_time=5.447e-04, forward_time=0.304, loss_ctc=49.996, loss_att=44.594, acc=0.763, loss=46.215, backward_time=0.405, grad_norm=29.227, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.562e-04, train_time=1.389 +[gpub010:0/16] 2024-01-29 18:57:19,415 (trainer:737) INFO: 16epoch:train:3601-3700batch: iter_time=8.308e-05, forward_time=0.371, loss_ctc=54.823, loss_att=49.988, acc=0.738, loss=51.439, backward_time=0.445, grad_norm=32.849, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.561e-04, train_time=1.696 +[gpub010:0/16] 2024-01-29 18:59:09,357 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-01-29 18:59:28,575 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 18:59:32,200 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 18:59:32,200 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-01-29 18:59:32,203 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 19:06:46,517 (trainer:737) INFO: 16epoch:train:3701-3800batch: iter_time=3.900, forward_time=0.320, loss_ctc=55.641, loss_att=55.402, acc=0.731, loss=55.474, backward_time=0.401, grad_norm=38.193, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.561e-04, train_time=5.671 +[gpub010:0/16] 2024-01-29 19:09:16,505 (trainer:737) INFO: 16epoch:train:3801-3900batch: iter_time=8.625e-05, forward_time=0.401, loss_ctc=55.384, loss_att=50.235, acc=0.731, loss=51.779, backward_time=0.433, grad_norm=41.589, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.560e-04, train_time=1.499 +[gpub010:0/16] 2024-01-29 19:12:07,710 (trainer:737) INFO: 16epoch:train:3901-4000batch: iter_time=9.422e-05, forward_time=0.323, loss_ctc=49.049, loss_att=49.638, acc=0.718, loss=49.462, backward_time=0.404, grad_norm=34.778, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.560e-04, train_time=1.712 +[gpub010:0/16] 2024-01-29 19:14:51,411 (trainer:737) INFO: 16epoch:train:4001-4100batch: iter_time=9.192e-05, forward_time=0.292, loss_ctc=53.104, loss_att=54.104, acc=0.739, loss=53.804, backward_time=0.406, grad_norm=36.389, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.559e-04, train_time=1.637 +[gpub010:0/16] 2024-01-29 19:17:46,428 (trainer:737) INFO: 16epoch:train:4101-4200batch: iter_time=3.808e-04, forward_time=0.380, loss_ctc=51.314, loss_att=51.957, acc=0.740, loss=51.764, backward_time=0.448, grad_norm=30.869, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.559e-04, train_time=1.748 +[gpub010:0/16] 2024-01-29 19:20:15,182 (trainer:737) INFO: 16epoch:train:4201-4300batch: iter_time=9.637e-05, forward_time=0.307, loss_ctc=54.986, loss_att=58.284, acc=0.742, loss=57.295, backward_time=0.419, grad_norm=31.436, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.558e-04, train_time=1.489 +[gpub010:0/16] 2024-01-29 19:23:30,008 (trainer:737) INFO: 16epoch:train:4301-4400batch: iter_time=0.001, forward_time=0.299, loss_ctc=51.375, loss_att=51.619, acc=0.745, loss=51.546, backward_time=0.407, grad_norm=30.039, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.557e-04, train_time=1.948 +[gpub010:0/16] 2024-01-29 19:25:56,739 (trainer:737) INFO: 16epoch:train:4401-4500batch: iter_time=9.333e-05, forward_time=0.368, loss_ctc=49.810, loss_att=49.257, acc=0.749, loss=49.423, backward_time=0.457, grad_norm=30.309, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.557e-04, train_time=1.466 +[gpub010:0/16] 2024-01-29 19:28:08,334 (trainer:737) INFO: 16epoch:train:4501-4600batch: iter_time=8.744e-05, forward_time=0.291, loss_ctc=46.949, loss_att=42.704, acc=0.745, loss=43.978, backward_time=0.404, grad_norm=31.639, clip=100.000, loss_scale=6.387e+33, optim_step_time=0.092, optim0_lr0=2.556e-04, train_time=1.316 +[gpub010:0/16] 2024-01-29 19:30:58,744 (trainer:737) INFO: 16epoch:train:4601-4700batch: iter_time=4.068e-04, forward_time=0.299, loss_ctc=48.293, loss_att=48.836, acc=0.727, loss=48.673, backward_time=0.407, grad_norm=30.268, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.556e-04, train_time=1.704 +[gpub010:0/16] 2024-01-29 19:33:37,013 (trainer:737) INFO: 16epoch:train:4701-4800batch: iter_time=9.664e-05, forward_time=0.379, loss_ctc=49.302, loss_att=44.988, acc=0.754, loss=46.282, backward_time=0.451, grad_norm=28.366, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.555e-04, train_time=1.581 +[gpub010:0/16] 2024-01-29 19:36:19,861 (trainer:737) INFO: 16epoch:train:4801-4900batch: iter_time=8.944e-05, forward_time=0.293, loss_ctc=52.806, loss_att=50.037, acc=0.746, loss=50.868, backward_time=0.405, grad_norm=32.145, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.555e-04, train_time=1.630 +[gpub010:0/16] 2024-01-29 19:38:39,165 (trainer:737) INFO: 16epoch:train:4901-5000batch: iter_time=5.707e-04, forward_time=0.304, loss_ctc=56.873, loss_att=52.239, acc=0.736, loss=53.629, backward_time=0.407, grad_norm=40.738, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.554e-04, train_time=1.393 +[gpub010:0/16] 2024-01-29 19:38:59,440 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-01-29 19:39:18,299 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 19:39:21,914 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 19:39:21,914 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-01-29 19:39:21,917 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 19:47:10,749 (trainer:737) INFO: 16epoch:train:5001-5100batch: iter_time=3.400, forward_time=0.365, loss_ctc=47.738, loss_att=44.602, acc=0.747, loss=45.543, backward_time=0.415, grad_norm=28.575, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.554e-04, train_time=5.114 +[gpub010:0/16] 2024-01-29 19:49:44,947 (trainer:737) INFO: 16epoch:train:5101-5200batch: iter_time=8.242e-05, forward_time=0.294, loss_ctc=53.170, loss_att=54.627, acc=0.721, loss=54.190, backward_time=0.408, grad_norm=40.798, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.553e-04, train_time=1.543 +[gpub010:0/16] 2024-01-29 19:52:51,553 (trainer:737) INFO: 16epoch:train:5201-5300batch: iter_time=7.968e-05, forward_time=0.437, loss_ctc=55.619, loss_att=54.023, acc=0.722, loss=54.502, backward_time=0.427, grad_norm=37.703, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.552e-04, train_time=1.866 +[gpub010:0/16] 2024-01-29 19:55:35,648 (trainer:737) INFO: 16epoch:train:5301-5400batch: iter_time=7.931e-05, forward_time=0.289, loss_ctc=47.722, loss_att=51.512, acc=0.738, loss=50.375, backward_time=0.406, grad_norm=30.093, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.552e-04, train_time=1.641 +[gpub010:0/16] 2024-01-29 19:58:19,681 (trainer:737) INFO: 16epoch:train:5401-5500batch: iter_time=8.376e-05, forward_time=0.304, loss_ctc=55.472, loss_att=55.855, acc=0.753, loss=55.740, backward_time=0.413, grad_norm=31.320, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.551e-04, train_time=1.640 +[gpub010:0/16] 2024-01-29 20:01:18,237 (trainer:737) INFO: 16epoch:train:5501-5600batch: iter_time=8.414e-05, forward_time=0.389, loss_ctc=46.230, loss_att=49.519, acc=0.745, loss=48.532, backward_time=0.422, grad_norm=29.128, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.551e-04, train_time=1.785 +[gpub010:0/16] 2024-01-29 20:04:09,705 (trainer:737) INFO: 16epoch:train:5601-5700batch: iter_time=8.400e-05, forward_time=0.299, loss_ctc=54.203, loss_att=53.700, acc=0.750, loss=53.851, backward_time=0.410, grad_norm=30.556, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.550e-04, train_time=1.715 +[gpub010:0/16] 2024-01-29 20:07:10,144 (trainer:737) INFO: 16epoch:train:5701-5800batch: iter_time=8.570e-05, forward_time=0.373, loss_ctc=49.250, loss_att=45.188, acc=0.742, loss=46.406, backward_time=0.433, grad_norm=31.229, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.550e-04, train_time=1.804 +[gpub010:0/16] 2024-01-29 20:09:45,069 (trainer:737) INFO: 16epoch:train:5801-5900batch: iter_time=8.121e-05, forward_time=0.300, loss_ctc=47.412, loss_att=46.093, acc=0.744, loss=46.489, backward_time=0.402, grad_norm=29.635, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.549e-04, train_time=1.548 +[gpub010:0/16] 2024-01-29 20:12:18,369 (trainer:737) INFO: 16epoch:train:5901-6000batch: iter_time=8.299e-05, forward_time=0.295, loss_ctc=45.238, loss_att=43.699, acc=0.746, loss=44.161, backward_time=0.410, grad_norm=28.078, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.549e-04, train_time=1.534 +[gpub010:0/16] 2024-01-29 20:15:40,629 (trainer:737) INFO: 16epoch:train:6001-6100batch: iter_time=8.475e-05, forward_time=0.403, loss_ctc=49.672, loss_att=44.671, acc=0.763, loss=46.171, backward_time=0.423, grad_norm=27.959, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.548e-04, train_time=2.022 +[gpub010:0/16] 2024-01-29 20:18:14,452 (trainer:737) INFO: 16epoch:train:6101-6200batch: iter_time=8.261e-05, forward_time=0.293, loss_ctc=54.728, loss_att=50.622, acc=0.735, loss=51.854, backward_time=0.405, grad_norm=33.644, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.547e-04, train_time=1.537 +[gpub010:0/16] 2024-01-29 20:19:45,151 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-01-29 20:20:04,477 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 20:20:08,133 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 20:20:08,133 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-01-29 20:20:08,136 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 20:26:13,679 (trainer:737) INFO: 16epoch:train:6201-6300batch: iter_time=3.282, forward_time=0.295, loss_ctc=54.603, loss_att=56.273, acc=0.726, loss=55.772, backward_time=0.414, grad_norm=40.663, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.547e-04, train_time=4.793 +[gpub010:0/16] 2024-01-29 20:29:38,913 (trainer:737) INFO: 16epoch:train:6301-6400batch: iter_time=7.849e-05, forward_time=0.370, loss_ctc=55.019, loss_att=51.220, acc=0.728, loss=52.360, backward_time=0.449, grad_norm=42.235, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.546e-04, train_time=2.052 +[gpub010:0/16] 2024-01-29 20:31:59,422 (trainer:737) INFO: 16epoch:train:6401-6500batch: iter_time=7.851e-05, forward_time=0.289, loss_ctc=48.841, loss_att=50.681, acc=0.713, loss=50.129, backward_time=0.403, grad_norm=36.157, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.546e-04, train_time=1.405 +[gpub010:0/16] 2024-01-29 20:34:33,042 (trainer:737) INFO: 16epoch:train:6501-6600batch: iter_time=9.163e-05, forward_time=0.320, loss_ctc=52.712, loss_att=54.029, acc=0.740, loss=53.634, backward_time=0.442, grad_norm=36.586, clip=100.000, loss_scale=1.277e+34, optim_step_time=0.101, optim0_lr0=2.545e-04, train_time=1.536 +[gpub010:0/16] 2024-01-29 20:37:19,380 (trainer:737) INFO: 16epoch:train:6601-6700batch: iter_time=8.352e-05, forward_time=0.370, loss_ctc=51.093, loss_att=52.408, acc=0.731, loss=52.013, backward_time=0.414, grad_norm=30.817, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.092, optim0_lr0=2.545e-04, train_time=1.663 +[gpub010:0/16] 2024-01-29 20:39:57,075 (trainer:737) INFO: 16epoch:train:6701-6800batch: iter_time=4.201e-04, forward_time=0.299, loss_ctc=54.554, loss_att=58.051, acc=0.737, loss=57.002, backward_time=0.416, grad_norm=31.445, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.093, optim0_lr0=2.544e-04, train_time=1.577 +[gpub010:0/16] 2024-01-29 20:42:32,298 (trainer:737) INFO: 16epoch:train:6801-6900batch: iter_time=9.091e-05, forward_time=0.419, loss_ctc=50.258, loss_att=49.639, acc=0.741, loss=49.825, backward_time=0.422, grad_norm=30.554, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.095, optim0_lr0=2.544e-04, train_time=1.552 +[gpub010:0/16] 2024-01-29 20:44:03,621 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 20:45:06,755 (trainer:737) INFO: 16epoch:train:6901-7000batch: iter_time=8.529e-05, forward_time=0.292, loss_ctc=49.286, loss_att=48.574, acc=0.745, loss=48.788, backward_time=0.403, grad_norm=30.501, clip=100.000, loss_scale=1.647e+34, optim_step_time=0.092, optim0_lr0=2.543e-04, train_time=1.544 +[gpub010:0/16] 2024-01-29 20:47:54,412 (trainer:737) INFO: 16epoch:train:7001-7100batch: iter_time=4.744e-04, forward_time=0.298, loss_ctc=46.610, loss_att=40.851, acc=0.743, loss=42.579, backward_time=0.404, grad_norm=30.499, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.543e-04, train_time=1.675 +[gpub010:0/16] 2024-01-29 20:50:41,704 (trainer:737) INFO: 16epoch:train:7101-7200batch: iter_time=9.138e-05, forward_time=0.398, loss_ctc=48.176, loss_att=50.513, acc=0.706, loss=49.812, backward_time=0.420, grad_norm=33.994, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.110, optim0_lr0=2.542e-04, train_time=1.674 +[gpub010:0/16] 2024-01-29 20:53:01,697 (trainer:737) INFO: 16epoch:train:7201-7300batch: iter_time=2.076e-04, forward_time=0.295, loss_ctc=48.934, loss_att=45.495, acc=0.745, loss=46.527, backward_time=0.415, grad_norm=29.934, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.541e-04, train_time=1.399 +[gpub010:0/16] 2024-01-29 20:56:13,019 (trainer:737) INFO: 16epoch:train:7301-7400batch: iter_time=9.573e-05, forward_time=0.390, loss_ctc=52.620, loss_att=50.293, acc=0.739, loss=50.991, backward_time=0.457, grad_norm=34.209, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.541e-04, train_time=1.913 +[gpub010:0/16] 2024-01-29 20:58:30,168 (trainer:737) INFO: 16epoch:train:7401-7500batch: iter_time=9.911e-05, forward_time=0.291, loss_ctc=56.776, loss_att=51.941, acc=0.736, loss=53.392, backward_time=0.408, grad_norm=40.844, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.540e-04, train_time=1.372 +[gpub010:0/16] 2024-01-29 20:58:50,395 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-01-29 20:59:09,424 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 20:59:12,963 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 20:59:12,963 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-01-29 20:59:12,966 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 21:08:09,495 (trainer:737) INFO: 16epoch:train:7501-7600batch: iter_time=3.997, forward_time=0.361, loss_ctc=47.449, loss_att=45.531, acc=0.748, loss=46.107, backward_time=0.502, grad_norm=28.794, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.540e-04, train_time=5.793 +[gpub010:0/16] 2024-01-29 21:11:26,871 (trainer:737) INFO: 16epoch:train:7601-7700batch: iter_time=7.809e-05, forward_time=0.291, loss_ctc=53.139, loss_att=54.642, acc=0.723, loss=54.191, backward_time=0.401, grad_norm=40.190, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.539e-04, train_time=1.974 +[gpub010:0/16] 2024-01-29 21:14:13,240 (trainer:737) INFO: 16epoch:train:7701-7800batch: iter_time=7.809e-05, forward_time=0.326, loss_ctc=54.551, loss_att=53.832, acc=0.724, loss=54.048, backward_time=0.459, grad_norm=40.382, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.539e-04, train_time=1.663 +[gpub010:0/16] 2024-01-29 21:16:46,054 (trainer:737) INFO: 16epoch:train:7801-7900batch: iter_time=7.948e-05, forward_time=0.363, loss_ctc=47.495, loss_att=51.625, acc=0.738, loss=50.386, backward_time=0.413, grad_norm=30.144, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=2.538e-04, train_time=1.527 +[gpub010:0/16] 2024-01-29 21:20:06,080 (trainer:737) INFO: 16epoch:train:7901-8000batch: iter_time=9.704e-04, forward_time=0.428, loss_ctc=55.357, loss_att=55.412, acc=0.754, loss=55.396, backward_time=0.428, grad_norm=30.789, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.109, optim0_lr0=2.538e-04, train_time=2.000 +[gpub010:0/16] 2024-01-29 21:22:51,788 (trainer:737) INFO: 16epoch:train:8001-8100batch: iter_time=8.208e-05, forward_time=0.290, loss_ctc=46.156, loss_att=49.413, acc=0.748, loss=48.436, backward_time=0.403, grad_norm=31.923, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.537e-04, train_time=1.658 +[gpub010:0/16] 2024-01-29 21:25:28,540 (trainer:737) INFO: 16epoch:train:8101-8200batch: iter_time=8.352e-05, forward_time=0.433, loss_ctc=54.302, loss_att=53.580, acc=0.752, loss=53.796, backward_time=0.436, grad_norm=31.357, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.537e-04, train_time=1.567 +[gpub010:0/16] 2024-01-29 21:26:45,220 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 21:28:09,238 (trainer:737) INFO: 16epoch:train:8201-8300batch: iter_time=8.237e-05, forward_time=0.289, loss_ctc=48.641, loss_att=44.864, acc=0.743, loss=45.997, backward_time=0.401, grad_norm=30.655, clip=100.000, loss_scale=7.552e+33, optim_step_time=0.092, optim0_lr0=2.536e-04, train_time=1.607 +[gpub010:0/16] 2024-01-29 21:30:37,343 (trainer:737) INFO: 16epoch:train:8301-8400batch: iter_time=8.008e-05, forward_time=0.290, loss_ctc=47.011, loss_att=45.782, acc=0.745, loss=46.151, backward_time=0.402, grad_norm=31.188, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.535e-04, train_time=1.481 +[gpub010:0/16] 2024-01-29 21:33:24,694 (trainer:737) INFO: 16epoch:train:8401-8500batch: iter_time=0.001, forward_time=0.419, loss_ctc=45.529, loss_att=44.056, acc=0.746, loss=44.498, backward_time=0.424, grad_norm=28.402, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=2.535e-04, train_time=1.672 +[gpub010:0/16] 2024-01-29 21:36:09,741 (trainer:737) INFO: 16epoch:train:8501-8600batch: iter_time=8.124e-05, forward_time=0.291, loss_ctc=49.409, loss_att=44.823, acc=0.764, loss=46.199, backward_time=0.403, grad_norm=28.640, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.534e-04, train_time=1.651 +[gpub010:0/16] 2024-01-29 21:39:16,983 (trainer:737) INFO: 16epoch:train:8601-8700batch: iter_time=1.336e-04, forward_time=0.398, loss_ctc=54.257, loss_att=50.473, acc=0.737, loss=51.608, backward_time=0.444, grad_norm=32.886, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.534e-04, train_time=1.871 +[gpub010:0/16] 2024-01-29 21:40:43,358 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-01-29 21:41:02,624 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 21:41:06,178 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 21:41:06,178 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-01-29 21:41:06,181 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 21:47:35,298 (trainer:737) INFO: 16epoch:train:8701-8800batch: iter_time=3.328, forward_time=0.373, loss_ctc=53.095, loss_att=55.628, acc=0.726, loss=54.868, backward_time=0.476, grad_norm=39.205, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.533e-04, train_time=4.983 +[gpub010:0/16] 2024-01-29 21:50:11,398 (trainer:737) INFO: 16epoch:train:8801-8900batch: iter_time=8.056e-05, forward_time=0.288, loss_ctc=53.299, loss_att=49.152, acc=0.733, loss=50.396, backward_time=0.399, grad_norm=35.795, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.533e-04, train_time=1.560 +[gpub010:0/16] 2024-01-29 21:52:50,881 (trainer:737) INFO: 16epoch:train:8901-9000batch: iter_time=8.503e-05, forward_time=0.289, loss_ctc=48.643, loss_att=49.880, acc=0.716, loss=49.509, backward_time=0.401, grad_norm=34.522, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.532e-04, train_time=1.596 +[gpub010:0/16] 2024-01-29 21:55:37,431 (trainer:737) INFO: 16epoch:train:9001-9100batch: iter_time=8.064e-05, forward_time=0.414, loss_ctc=51.816, loss_att=53.235, acc=0.743, loss=52.809, backward_time=0.436, grad_norm=38.618, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.135, optim0_lr0=2.532e-04, train_time=1.665 +[gpub010:0/16] 2024-01-29 21:58:29,641 (trainer:737) INFO: 16epoch:train:9101-9200batch: iter_time=8.483e-05, forward_time=0.289, loss_ctc=50.763, loss_att=52.131, acc=0.731, loss=51.721, backward_time=0.402, grad_norm=31.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.531e-04, train_time=1.720 +[gpub010:0/16] 2024-01-29 22:01:13,373 (trainer:737) INFO: 16epoch:train:9201-9300batch: iter_time=8.146e-05, forward_time=0.430, loss_ctc=54.743, loss_att=58.177, acc=0.740, loss=57.147, backward_time=0.430, grad_norm=32.610, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=2.531e-04, train_time=1.638 +[gpub010:0/16] 2024-01-29 22:03:35,087 (trainer:737) INFO: 16epoch:train:9301-9400batch: iter_time=8.368e-05, forward_time=0.291, loss_ctc=50.227, loss_att=49.791, acc=0.741, loss=49.922, backward_time=0.403, grad_norm=31.983, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.530e-04, train_time=1.418 +[gpub010:0/16] 2024-01-29 22:06:12,649 (trainer:737) INFO: 16epoch:train:9401-9500batch: iter_time=8.535e-05, forward_time=0.290, loss_ctc=48.941, loss_att=48.318, acc=0.746, loss=48.505, backward_time=0.402, grad_norm=30.522, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.529e-04, train_time=1.574 +[gpub010:0/16] 2024-01-29 22:09:04,007 (trainer:737) INFO: 16epoch:train:9501-9600batch: iter_time=8.942e-05, forward_time=0.411, loss_ctc=46.481, loss_att=40.818, acc=0.742, loss=42.517, backward_time=0.422, grad_norm=29.707, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=2.529e-04, train_time=1.714 +[gpub010:0/16] 2024-01-29 22:11:24,728 (trainer:737) INFO: 16epoch:train:9601-9700batch: iter_time=8.993e-05, forward_time=0.287, loss_ctc=47.621, loss_att=49.362, acc=0.710, loss=48.839, backward_time=0.400, grad_norm=30.931, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.528e-04, train_time=1.407 +[gpub010:0/16] 2024-01-29 22:14:28,763 (trainer:737) INFO: 16epoch:train:9701-9800batch: iter_time=9.064e-05, forward_time=0.375, loss_ctc=48.306, loss_att=44.875, acc=0.748, loss=45.904, backward_time=0.485, grad_norm=28.930, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.528e-04, train_time=1.840 +[gpub010:0/16] 2024-01-29 22:17:07,208 (trainer:737) INFO: 16epoch:train:9801-9900batch: iter_time=8.785e-05, forward_time=0.291, loss_ctc=52.345, loss_att=49.909, acc=0.737, loss=50.640, backward_time=0.402, grad_norm=34.093, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.527e-04, train_time=1.584 +[gpub010:0/16] 2024-01-29 22:19:33,473 (trainer:737) INFO: 16epoch:train:9901-10000batch: iter_time=8.496e-05, forward_time=0.290, loss_ctc=55.634, loss_att=52.928, acc=0.734, loss=53.740, backward_time=0.401, grad_norm=38.083, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.527e-04, train_time=1.463 +[gpub010:0/16] 2024-01-29 22:19:53,502 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-01-29 22:20:13,320 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 22:20:17,228 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 22:20:17,228 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-01-29 22:20:17,231 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 22:29:13,439 (trainer:737) INFO: 16epoch:train:10001-10100batch: iter_time=3.739, forward_time=0.435, loss_ctc=46.907, loss_att=43.524, acc=0.745, loss=44.539, backward_time=0.424, grad_norm=28.177, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.526e-04, train_time=5.799 +[gpub010:0/16] 2024-01-29 22:32:22,889 (trainer:737) INFO: 16epoch:train:10101-10200batch: iter_time=8.453e-05, forward_time=0.380, loss_ctc=52.600, loss_att=52.857, acc=0.722, loss=52.780, backward_time=0.460, grad_norm=42.632, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.526e-04, train_time=1.895 +[gpub010:0/16] 2024-01-29 22:34:52,684 (trainer:737) INFO: 16epoch:train:10201-10300batch: iter_time=9.037e-05, forward_time=0.291, loss_ctc=54.443, loss_att=53.517, acc=0.721, loss=53.795, backward_time=0.402, grad_norm=40.752, clip=100.000, loss_scale=7.996e+33, optim_step_time=0.092, optim0_lr0=2.525e-04, train_time=1.498 +[gpub010:0/16] 2024-01-29 22:38:10,177 (trainer:737) INFO: 16epoch:train:10301-10400batch: iter_time=9.344e-05, forward_time=0.287, loss_ctc=47.034, loss_att=50.711, acc=0.734, loss=49.608, backward_time=0.399, grad_norm=30.796, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.525e-04, train_time=1.975 +[gpub010:0/16] 2024-01-29 22:41:23,548 (trainer:737) INFO: 16epoch:train:10401-10500batch: iter_time=9.843e-05, forward_time=0.451, loss_ctc=55.171, loss_att=55.468, acc=0.743, loss=55.379, backward_time=0.427, grad_norm=31.355, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=2.524e-04, train_time=1.934 +[gpub010:0/16] 2024-01-29 22:44:03,535 (trainer:737) INFO: 16epoch:train:10501-10600batch: iter_time=1.008e-04, forward_time=0.289, loss_ctc=45.826, loss_att=47.347, acc=0.743, loss=46.890, backward_time=0.401, grad_norm=29.435, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.524e-04, train_time=1.600 +[gpub010:0/16] 2024-01-29 22:47:49,603 (trainer:737) INFO: 16epoch:train:10601-10700batch: iter_time=8.881e-05, forward_time=0.444, loss_ctc=53.276, loss_att=52.948, acc=0.748, loss=53.047, backward_time=0.443, grad_norm=29.982, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.523e-04, train_time=2.260 +[gpub010:0/16] 2024-01-29 22:50:01,356 (trainer:737) INFO: 16epoch:train:10701-10800batch: iter_time=8.356e-05, forward_time=0.288, loss_ctc=48.160, loss_att=44.472, acc=0.740, loss=45.579, backward_time=0.401, grad_norm=30.442, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.523e-04, train_time=1.318 +[gpub010:0/16] 2024-01-29 22:53:02,329 (trainer:737) INFO: 16epoch:train:10801-10900batch: iter_time=8.627e-05, forward_time=0.287, loss_ctc=47.109, loss_att=44.709, acc=0.734, loss=45.429, backward_time=0.401, grad_norm=30.974, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.522e-04, train_time=1.810 +[gpub010:0/16] 2024-01-29 22:55:33,584 (trainer:737) INFO: 16epoch:train:10901-11000batch: iter_time=2.453e-04, forward_time=0.336, loss_ctc=45.288, loss_att=44.327, acc=0.731, loss=44.616, backward_time=0.485, grad_norm=29.441, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.521e-04, train_time=1.512 +[gpub010:0/16] 2024-01-29 22:58:06,095 (trainer:737) INFO: 16epoch:train:11001-11100batch: iter_time=8.849e-05, forward_time=0.289, loss_ctc=49.201, loss_att=44.931, acc=0.751, loss=46.212, backward_time=0.402, grad_norm=30.325, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.521e-04, train_time=1.525 +[gpub010:0/16] 2024-01-29 23:01:20,705 (trainer:737) INFO: 16epoch:train:11101-11200batch: iter_time=0.001, forward_time=0.341, loss_ctc=54.698, loss_att=49.778, acc=0.740, loss=51.254, backward_time=0.482, grad_norm=34.150, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.520e-04, train_time=1.945 +[gpub010:0/16] 2024-01-29 23:02:53,268 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-01-29 23:03:12,718 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 23:03:16,718 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 23:03:16,718 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-01-29 23:03:16,721 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 23:09:18,297 (trainer:737) INFO: 16epoch:train:11201-11300batch: iter_time=3.325, forward_time=0.288, loss_ctc=53.014, loss_att=54.433, acc=0.729, loss=54.007, backward_time=0.402, grad_norm=36.584, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.520e-04, train_time=4.777 +[gpub010:0/16] 2024-01-29 23:12:11,198 (trainer:737) INFO: 16epoch:train:11301-11400batch: iter_time=8.368e-05, forward_time=0.410, loss_ctc=53.467, loss_att=49.088, acc=0.735, loss=50.402, backward_time=0.432, grad_norm=41.107, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.110, optim0_lr0=2.519e-04, train_time=1.728 +[gpub010:0/16] 2024-01-29 23:15:02,906 (trainer:737) INFO: 16epoch:train:11401-11500batch: iter_time=8.295e-05, forward_time=0.289, loss_ctc=47.932, loss_att=48.552, acc=0.719, loss=48.366, backward_time=0.401, grad_norm=34.818, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.519e-04, train_time=1.717 +[gpub010:0/16] 2024-01-29 23:17:15,253 (trainer:737) INFO: 16epoch:train:11501-11600batch: iter_time=8.508e-05, forward_time=0.290, loss_ctc=51.122, loss_att=52.099, acc=0.745, loss=51.806, backward_time=0.403, grad_norm=37.257, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.518e-04, train_time=1.323 +[gpub010:0/16] 2024-01-29 23:20:00,030 (trainer:737) INFO: 16epoch:train:11601-11700batch: iter_time=8.443e-05, forward_time=0.412, loss_ctc=50.998, loss_att=51.522, acc=0.732, loss=51.364, backward_time=0.430, grad_norm=31.096, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.518e-04, train_time=1.647 +[gpub010:0/16] 2024-01-29 23:22:41,486 (trainer:737) INFO: 16epoch:train:11701-11800batch: iter_time=8.333e-05, forward_time=0.293, loss_ctc=54.313, loss_att=57.490, acc=0.742, loss=56.537, backward_time=0.405, grad_norm=31.385, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.517e-04, train_time=1.615 +[gpub010:0/16] 2024-01-29 23:26:24,339 (trainer:737) INFO: 16epoch:train:11801-11900batch: iter_time=0.342, forward_time=0.426, loss_ctc=50.007, loss_att=48.585, acc=0.744, loss=49.012, backward_time=0.447, grad_norm=30.763, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.517e-04, train_time=2.228 +[gpub010:0/16] 2024-01-29 23:28:47,854 (trainer:737) INFO: 16epoch:train:11901-12000batch: iter_time=8.720e-05, forward_time=0.291, loss_ctc=49.137, loss_att=48.360, acc=0.747, loss=48.593, backward_time=0.402, grad_norm=30.717, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.516e-04, train_time=1.434 +[gpub010:0/16] 2024-01-29 23:31:24,417 (trainer:737) INFO: 16epoch:train:12001-12100batch: iter_time=8.656e-05, forward_time=0.288, loss_ctc=45.944, loss_att=40.258, acc=0.746, loss=41.964, backward_time=0.399, grad_norm=34.933, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.516e-04, train_time=1.566 +[gpub010:0/16] 2024-01-29 23:33:58,490 (trainer:737) INFO: 16epoch:train:12101-12200batch: iter_time=8.616e-05, forward_time=0.439, loss_ctc=47.828, loss_att=48.636, acc=0.714, loss=48.394, backward_time=0.433, grad_norm=31.993, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.515e-04, train_time=1.541 +[gpub010:0/16] 2024-01-29 23:36:25,903 (trainer:737) INFO: 16epoch:train:12201-12300batch: iter_time=8.346e-05, forward_time=0.290, loss_ctc=48.906, loss_att=44.983, acc=0.747, loss=46.160, backward_time=0.403, grad_norm=29.765, clip=100.000, loss_scale=1.599e+34, optim_step_time=0.092, optim0_lr0=2.515e-04, train_time=1.472 +[gpub010:0/16] 2024-01-29 23:37:42,119 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-29 23:39:29,731 (trainer:737) INFO: 16epoch:train:12301-12400batch: iter_time=8.505e-05, forward_time=0.291, loss_ctc=52.220, loss_att=49.441, acc=0.742, loss=50.275, backward_time=0.402, grad_norm=32.620, clip=100.000, loss_scale=1.406e+34, optim_step_time=0.092, optim0_lr0=2.514e-04, train_time=1.840 +[gpub010:0/16] 2024-01-29 23:41:56,477 (trainer:737) INFO: 16epoch:train:12401-12500batch: iter_time=8.252e-05, forward_time=0.362, loss_ctc=55.432, loss_att=52.267, acc=0.736, loss=53.216, backward_time=0.461, grad_norm=39.465, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.513e-04, train_time=1.467 +[gpub010:0/16] 2024-01-29 23:42:16,506 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-01-29 23:42:35,879 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-29 23:42:39,836 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-29 23:42:39,836 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-01-29 23:42:39,840 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-29 23:49:54,618 (trainer:737) INFO: 16epoch:train:12501-12600batch: iter_time=3.126, forward_time=0.288, loss_ctc=46.795, loss_att=46.791, acc=0.748, loss=46.792, backward_time=0.401, grad_norm=28.116, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.513e-04, train_time=4.781 +[gpub010:0/16] 2024-01-29 23:53:21,776 (trainer:737) INFO: 16epoch:train:12601-12700batch: iter_time=8.689e-05, forward_time=0.352, loss_ctc=52.383, loss_att=54.511, acc=0.726, loss=53.873, backward_time=0.487, grad_norm=41.848, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.512e-04, train_time=2.071 +[gpub010:0/16] 2024-01-29 23:56:27,844 (trainer:737) INFO: 16epoch:train:12701-12800batch: iter_time=8.679e-05, forward_time=0.290, loss_ctc=53.033, loss_att=53.451, acc=0.725, loss=53.326, backward_time=0.402, grad_norm=38.651, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.512e-04, train_time=1.861 +[gpub010:0/16] 2024-01-29 23:59:18,579 (trainer:737) INFO: 16epoch:train:12801-12900batch: iter_time=9.451e-05, forward_time=0.399, loss_ctc=47.532, loss_att=52.311, acc=0.738, loss=50.877, backward_time=0.438, grad_norm=30.444, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.511e-04, train_time=1.706 +[gpub010:0/16] 2024-01-29 23:59:31,660 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 00:02:08,542 (trainer:737) INFO: 16epoch:train:12901-13000batch: iter_time=1.087e-04, forward_time=0.292, loss_ctc=54.975, loss_att=55.637, acc=0.754, loss=55.439, backward_time=0.406, grad_norm=30.450, clip=100.000, loss_scale=5.664e+33, optim_step_time=0.092, optim0_lr0=2.511e-04, train_time=1.699 +[gpub010:0/16] 2024-01-30 00:04:59,090 (trainer:737) INFO: 16epoch:train:13001-13100batch: iter_time=1.032e-04, forward_time=0.369, loss_ctc=45.656, loss_att=49.372, acc=0.747, loss=48.257, backward_time=0.463, grad_norm=28.681, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.510e-04, train_time=1.706 +[gpub010:0/16] 2024-01-30 00:08:13,501 (trainer:737) INFO: 16epoch:train:13101-13200batch: iter_time=1.048e-04, forward_time=0.293, loss_ctc=53.295, loss_att=53.374, acc=0.753, loss=53.350, backward_time=0.404, grad_norm=30.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.510e-04, train_time=1.944 +[gpub010:0/16] 2024-01-30 00:11:13,492 (trainer:737) INFO: 16epoch:train:13201-13300batch: iter_time=1.055e-04, forward_time=0.287, loss_ctc=48.404, loss_att=45.007, acc=0.744, loss=46.026, backward_time=0.398, grad_norm=30.449, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.509e-04, train_time=1.799 +[gpub010:0/16] 2024-01-30 00:14:06,935 (trainer:737) INFO: 16epoch:train:13301-13400batch: iter_time=1.025e-04, forward_time=0.354, loss_ctc=47.959, loss_att=46.781, acc=0.745, loss=47.134, backward_time=0.483, grad_norm=31.015, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=2.509e-04, train_time=1.735 +[gpub010:0/16] 2024-01-30 00:17:19,323 (trainer:737) INFO: 16epoch:train:13401-13500batch: iter_time=9.805e-05, forward_time=0.289, loss_ctc=45.111, loss_att=43.949, acc=0.748, loss=44.298, backward_time=0.399, grad_norm=29.410, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.508e-04, train_time=1.923 +[gpub010:0/16] 2024-01-30 00:20:41,961 (trainer:737) INFO: 16epoch:train:13501-13600batch: iter_time=9.660e-05, forward_time=0.384, loss_ctc=49.071, loss_att=44.723, acc=0.765, loss=46.028, backward_time=0.444, grad_norm=28.912, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.508e-04, train_time=2.026 +[gpub010:0/16] 2024-01-30 00:23:49,532 (trainer:737) INFO: 16epoch:train:13601-13700batch: iter_time=9.176e-05, forward_time=0.290, loss_ctc=54.003, loss_att=49.898, acc=0.739, loss=51.130, backward_time=0.403, grad_norm=32.868, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.507e-04, train_time=1.876 +[gpub010:0/16] 2024-01-30 00:25:47,597 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-01-30 00:26:07,030 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 00:26:10,635 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 00:26:10,635 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-01-30 00:26:10,638 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 00:33:31,796 (trainer:737) INFO: 16epoch:train:13701-13800batch: iter_time=3.267, forward_time=0.377, loss_ctc=52.375, loss_att=55.601, acc=0.729, loss=54.633, backward_time=0.415, grad_norm=41.127, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.507e-04, train_time=5.822 +[gpub010:0/16] 2024-01-30 00:37:03,995 (trainer:737) INFO: 16epoch:train:13801-13900batch: iter_time=1.064e-04, forward_time=0.287, loss_ctc=53.440, loss_att=48.645, acc=0.738, loss=50.084, backward_time=0.398, grad_norm=39.747, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.506e-04, train_time=2.122 +[gpub010:0/16] 2024-01-30 00:39:58,362 (trainer:737) INFO: 16epoch:train:13901-14000batch: iter_time=8.864e-05, forward_time=0.352, loss_ctc=47.691, loss_att=48.993, acc=0.719, loss=48.602, backward_time=0.430, grad_norm=34.728, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.506e-04, train_time=1.743 +[gpub010:0/16] 2024-01-30 00:43:14,160 (trainer:737) INFO: 16epoch:train:14001-14100batch: iter_time=8.869e-05, forward_time=0.289, loss_ctc=51.673, loss_att=52.853, acc=0.744, loss=52.499, backward_time=0.399, grad_norm=35.763, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.505e-04, train_time=1.958 +[gpub010:0/16] 2024-01-30 00:46:42,661 (trainer:737) INFO: 16epoch:train:14101-14200batch: iter_time=9.764e-05, forward_time=0.289, loss_ctc=50.513, loss_att=51.693, acc=0.734, loss=51.339, backward_time=0.398, grad_norm=31.823, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.505e-04, train_time=2.085 +[gpub010:0/16] 2024-01-30 00:49:57,320 (trainer:737) INFO: 16epoch:train:14201-14300batch: iter_time=8.734e-05, forward_time=0.359, loss_ctc=53.693, loss_att=57.218, acc=0.743, loss=56.161, backward_time=0.442, grad_norm=31.530, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.504e-04, train_time=1.946 +[gpub010:0/16] 2024-01-30 00:52:53,550 (trainer:737) INFO: 16epoch:train:14301-14400batch: iter_time=8.506e-05, forward_time=0.290, loss_ctc=49.679, loss_att=49.119, acc=0.742, loss=49.287, backward_time=0.402, grad_norm=52.419, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.503e-04, train_time=1.762 +[gpub010:0/16] 2024-01-30 00:54:04,042 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 00:56:17,344 (trainer:737) INFO: 16epoch:train:14401-14500batch: iter_time=9.525e-05, forward_time=0.359, loss_ctc=48.331, loss_att=48.176, acc=0.748, loss=48.223, backward_time=0.454, grad_norm=30.196, clip=100.000, loss_scale=3.383e+33, optim_step_time=0.098, optim0_lr0=2.503e-04, train_time=2.038 +[gpub010:0/16] 2024-01-30 00:59:19,919 (trainer:737) INFO: 16epoch:train:14501-14600batch: iter_time=9.155e-05, forward_time=0.287, loss_ctc=46.724, loss_att=40.865, acc=0.744, loss=42.623, backward_time=0.396, grad_norm=31.414, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.502e-04, train_time=1.825 +[gpub010:0/16] 2024-01-30 01:02:13,352 (trainer:737) INFO: 16epoch:train:14601-14700batch: iter_time=1.037e-04, forward_time=0.308, loss_ctc=47.306, loss_att=48.730, acc=0.714, loss=48.303, backward_time=0.412, grad_norm=31.042, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.502e-04, train_time=1.734 +[gpub010:0/16] 2024-01-30 01:05:54,023 (trainer:737) INFO: 16epoch:train:14701-14800batch: iter_time=9.509e-05, forward_time=0.351, loss_ctc=47.964, loss_att=44.530, acc=0.749, loss=45.560, backward_time=0.417, grad_norm=29.420, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.501e-04, train_time=2.207 +[gpub010:0/16] 2024-01-30 01:09:24,018 (trainer:737) INFO: 16epoch:train:14801-14900batch: iter_time=8.918e-05, forward_time=0.289, loss_ctc=51.662, loss_att=48.524, acc=0.743, loss=49.465, backward_time=0.401, grad_norm=32.825, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.501e-04, train_time=2.100 +[gpub010:0/16] 2024-01-30 01:12:28,880 (trainer:737) INFO: 16epoch:train:14901-15000batch: iter_time=1.027e-04, forward_time=0.358, loss_ctc=55.337, loss_att=52.730, acc=0.735, loss=53.512, backward_time=0.427, grad_norm=38.426, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.500e-04, train_time=1.848 +[gpub010:0/16] 2024-01-30 01:49:57,446 (trainer:343) INFO: 16epoch results: [train] iter_time=0.283, forward_time=0.333, loss_ctc=51.076, loss_att=50.076, acc=0.738, loss=50.376, backward_time=0.421, grad_norm=33.323, clip=100.000, loss_scale=8.600e+33, optim_step_time=0.096, optim0_lr0=2.540e-04, train_time=1.980, time=8 hours, 15 minutes and 23.83 seconds, total_count=270000, gpu_max_cached_mem_GB=42.072, [valid] loss_ctc=43.657, cer_ctc=0.222, loss_att=46.324, acc=0.640, cer=0.314, wer=0.997, loss=45.524, time=37 minutes and 3.45 seconds, total_count=84078, gpu_max_cached_mem_GB=42.072 +[gpub010:0/16] 2024-01-30 01:50:07,991 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-01-30 01:50:08,023 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/11epoch.pth +[gpub010:0/16] 2024-01-30 01:50:08,024 (trainer:272) INFO: 17/45epoch started. Estimated time to finish: 1 week, 3 days and 11 hours +[gpub010:0/16] 2024-01-30 01:50:08,034 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-01-30 01:50:26,414 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 01:50:29,892 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 01:50:29,892 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-01-30 01:50:29,895 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 01:57:22,221 (trainer:737) INFO: 17epoch:train:1-100batch: iter_time=2.807, forward_time=0.323, loss_ctc=50.340, loss_att=51.072, acc=0.719, loss=50.852, backward_time=0.416, grad_norm=37.879, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.500e-04, train_time=4.342 +[gpub010:0/16] 2024-01-30 01:59:44,522 (trainer:737) INFO: 17epoch:train:101-200batch: iter_time=9.544e-05, forward_time=0.315, loss_ctc=56.409, loss_att=51.014, acc=0.719, loss=52.633, backward_time=0.415, grad_norm=36.064, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=2.499e-04, train_time=1.421 +[gpub010:0/16] 2024-01-30 02:02:10,566 (trainer:737) INFO: 17epoch:train:201-300batch: iter_time=1.097e-04, forward_time=0.291, loss_ctc=52.957, loss_att=56.336, acc=0.732, loss=55.322, backward_time=0.407, grad_norm=36.214, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.499e-04, train_time=1.461 +[gpub010:0/16] 2024-01-30 02:04:47,850 (trainer:737) INFO: 17epoch:train:301-400batch: iter_time=1.022e-04, forward_time=0.326, loss_ctc=56.906, loss_att=55.198, acc=0.728, loss=55.710, backward_time=0.423, grad_norm=34.708, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.498e-04, train_time=1.573 +[gpub010:0/16] 2024-01-30 02:07:07,569 (trainer:737) INFO: 17epoch:train:401-500batch: iter_time=9.814e-05, forward_time=0.324, loss_ctc=49.707, loss_att=45.252, acc=0.722, loss=46.589, backward_time=0.420, grad_norm=32.758, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=2.498e-04, train_time=1.396 +[gpub010:0/16] 2024-01-30 02:09:28,743 (trainer:737) INFO: 17epoch:train:501-600batch: iter_time=1.038e-04, forward_time=0.291, loss_ctc=56.214, loss_att=49.748, acc=0.752, loss=51.687, backward_time=0.408, grad_norm=38.655, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.497e-04, train_time=1.412 +[gpub010:0/16] 2024-01-30 02:11:59,091 (trainer:737) INFO: 17epoch:train:601-700batch: iter_time=1.021e-04, forward_time=0.328, loss_ctc=53.762, loss_att=51.113, acc=0.729, loss=51.907, backward_time=0.432, grad_norm=34.161, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.497e-04, train_time=1.504 +[gpub010:0/16] 2024-01-30 02:14:31,159 (trainer:737) INFO: 17epoch:train:701-800batch: iter_time=1.038e-04, forward_time=0.337, loss_ctc=50.855, loss_att=42.557, acc=0.748, loss=45.046, backward_time=0.417, grad_norm=33.762, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=2.496e-04, train_time=1.520 +[gpub010:0/16] 2024-01-30 02:16:53,317 (trainer:737) INFO: 17epoch:train:801-900batch: iter_time=8.468e-05, forward_time=0.297, loss_ctc=52.382, loss_att=54.872, acc=0.742, loss=54.125, backward_time=0.422, grad_norm=31.091, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=2.496e-04, train_time=1.422 +[gpub010:0/16] 2024-01-30 02:19:20,784 (trainer:737) INFO: 17epoch:train:901-1000batch: iter_time=1.011e-04, forward_time=0.353, loss_ctc=47.520, loss_att=43.415, acc=0.749, loss=44.646, backward_time=0.423, grad_norm=32.135, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=2.495e-04, train_time=1.475 +[gpub010:0/16] 2024-01-30 02:21:59,424 (trainer:737) INFO: 17epoch:train:1001-1100batch: iter_time=1.103e-04, forward_time=0.302, loss_ctc=51.207, loss_att=49.146, acc=0.733, loss=49.764, backward_time=0.400, grad_norm=32.992, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=2.495e-04, train_time=1.585 +[gpub010:0/16] 2024-01-30 02:24:27,956 (trainer:737) INFO: 17epoch:train:1101-1200batch: iter_time=1.026e-04, forward_time=0.318, loss_ctc=62.181, loss_att=58.742, acc=0.727, loss=59.774, backward_time=0.443, grad_norm=45.228, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=2.494e-04, train_time=1.486 +[gpub010:0/16] 2024-01-30 02:25:56,663 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-01-30 02:26:15,697 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 02:26:19,294 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 02:26:19,294 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-01-30 02:26:19,298 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 02:32:00,356 (trainer:737) INFO: 17epoch:train:1201-1300batch: iter_time=3.051, forward_time=0.343, loss_ctc=51.328, loss_att=47.665, acc=0.744, loss=48.764, backward_time=0.410, grad_norm=34.876, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=2.494e-04, train_time=4.524 +[gpub010:0/16] 2024-01-30 02:34:28,802 (trainer:737) INFO: 17epoch:train:1301-1400batch: iter_time=7.988e-05, forward_time=0.286, loss_ctc=51.717, loss_att=50.374, acc=0.698, loss=50.776, backward_time=0.398, grad_norm=36.452, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=2.493e-04, train_time=1.483 +[gpub010:0/16] 2024-01-30 02:37:12,765 (trainer:737) INFO: 17epoch:train:1401-1500batch: iter_time=7.879e-05, forward_time=0.316, loss_ctc=56.072, loss_att=60.231, acc=0.713, loss=58.983, backward_time=0.438, grad_norm=38.603, clip=100.000, loss_scale=4.387e+33, optim_step_time=0.095, optim0_lr0=2.493e-04, train_time=1.640 +[gpub010:0/16] 2024-01-30 02:39:33,633 (trainer:737) INFO: 17epoch:train:1501-1600batch: iter_time=8.438e-05, forward_time=0.341, loss_ctc=50.738, loss_att=48.791, acc=0.743, loss=49.375, backward_time=0.420, grad_norm=32.022, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.492e-04, train_time=1.409 +[gpub010:0/16] 2024-01-30 02:42:07,633 (trainer:737) INFO: 17epoch:train:1601-1700batch: iter_time=8.906e-05, forward_time=0.289, loss_ctc=51.227, loss_att=48.508, acc=0.713, loss=49.324, backward_time=0.406, grad_norm=34.176, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.492e-04, train_time=1.539 +[gpub010:0/16] 2024-01-30 02:44:41,331 (trainer:737) INFO: 17epoch:train:1701-1800batch: iter_time=8.050e-05, forward_time=0.331, loss_ctc=50.563, loss_att=43.805, acc=0.750, loss=45.832, backward_time=0.420, grad_norm=31.903, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.491e-04, train_time=1.537 +[gpub010:0/16] 2024-01-30 02:47:20,872 (trainer:737) INFO: 17epoch:train:1801-1900batch: iter_time=8.203e-05, forward_time=0.336, loss_ctc=55.728, loss_att=48.591, acc=0.732, loss=50.732, backward_time=0.413, grad_norm=45.122, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.491e-04, train_time=1.596 +[gpub010:0/16] 2024-01-30 02:49:55,282 (trainer:737) INFO: 17epoch:train:1901-2000batch: iter_time=4.852e-04, forward_time=0.301, loss_ctc=49.132, loss_att=48.246, acc=0.718, loss=48.512, backward_time=0.406, grad_norm=33.375, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.490e-04, train_time=1.543 +[gpub010:0/16] 2024-01-30 02:52:20,027 (trainer:737) INFO: 17epoch:train:2001-2100batch: iter_time=8.497e-05, forward_time=0.331, loss_ctc=52.272, loss_att=45.368, acc=0.754, loss=47.439, backward_time=0.434, grad_norm=30.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.489e-04, train_time=1.447 +[gpub010:0/16] 2024-01-30 02:55:04,271 (trainer:737) INFO: 17epoch:train:2101-2200batch: iter_time=8.137e-05, forward_time=0.316, loss_ctc=49.356, loss_att=52.533, acc=0.723, loss=51.580, backward_time=0.411, grad_norm=32.220, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.489e-04, train_time=1.643 +[gpub010:0/16] 2024-01-30 02:57:25,630 (trainer:737) INFO: 17epoch:train:2201-2300batch: iter_time=4.086e-04, forward_time=0.321, loss_ctc=45.318, loss_att=40.536, acc=0.753, loss=41.971, backward_time=0.426, grad_norm=29.660, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.488e-04, train_time=1.413 +[gpub010:0/16] 2024-01-30 02:59:59,549 (trainer:737) INFO: 17epoch:train:2301-2400batch: iter_time=8.888e-05, forward_time=0.345, loss_ctc=64.090, loss_att=64.417, acc=0.689, loss=64.319, backward_time=0.416, grad_norm=45.686, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.488e-04, train_time=1.539 +[gpub010:0/16] 2024-01-30 03:02:38,384 (trainer:737) INFO: 17epoch:train:2401-2500batch: iter_time=8.973e-05, forward_time=0.291, loss_ctc=52.552, loss_att=42.806, acc=0.760, loss=45.730, backward_time=0.407, grad_norm=32.253, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.487e-04, train_time=1.589 +[gpub010:0/16] 2024-01-30 03:02:58,542 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-01-30 03:03:17,653 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 03:03:21,163 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 03:03:21,163 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-01-30 03:03:21,167 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 03:13:10,182 (trainer:737) INFO: 17epoch:train:2501-2600batch: iter_time=3.053, forward_time=0.342, loss_ctc=47.470, loss_att=48.278, acc=0.712, loss=48.036, backward_time=0.405, grad_norm=33.773, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.487e-04, train_time=6.317 +[gpub010:0/16] 2024-01-30 03:15:58,792 (trainer:737) INFO: 17epoch:train:2601-2700batch: iter_time=8.546e-05, forward_time=0.321, loss_ctc=53.639, loss_att=49.658, acc=0.721, loss=50.852, backward_time=0.403, grad_norm=34.631, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.486e-04, train_time=1.687 +[gpub010:0/16] 2024-01-30 03:18:39,143 (trainer:737) INFO: 17epoch:train:2701-2800batch: iter_time=8.819e-05, forward_time=0.291, loss_ctc=50.927, loss_att=54.248, acc=0.733, loss=53.252, backward_time=0.407, grad_norm=31.740, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.486e-04, train_time=1.603 +[gpub010:0/16] 2024-01-30 03:21:33,326 (trainer:737) INFO: 17epoch:train:2801-2900batch: iter_time=8.574e-05, forward_time=0.350, loss_ctc=55.032, loss_att=54.038, acc=0.725, loss=54.336, backward_time=0.442, grad_norm=35.842, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.485e-04, train_time=1.741 +[gpub010:0/16] 2024-01-30 03:24:07,305 (trainer:737) INFO: 17epoch:train:2901-3000batch: iter_time=9.069e-05, forward_time=0.306, loss_ctc=47.875, loss_att=44.703, acc=0.717, loss=45.655, backward_time=0.406, grad_norm=33.477, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.485e-04, train_time=1.540 +[gpub010:0/16] 2024-01-30 03:26:48,793 (trainer:737) INFO: 17epoch:train:3001-3100batch: iter_time=8.428e-05, forward_time=0.338, loss_ctc=52.657, loss_att=47.334, acc=0.752, loss=48.931, backward_time=0.455, grad_norm=36.935, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.484e-04, train_time=1.615 +[gpub010:0/16] 2024-01-30 03:29:40,917 (trainer:737) INFO: 17epoch:train:3101-3200batch: iter_time=8.992e-05, forward_time=0.290, loss_ctc=52.479, loss_att=48.513, acc=0.736, loss=49.703, backward_time=0.401, grad_norm=32.149, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.484e-04, train_time=1.720 +[gpub010:0/16] 2024-01-30 03:32:16,602 (trainer:737) INFO: 17epoch:train:3201-3300batch: iter_time=9.040e-05, forward_time=0.313, loss_ctc=49.183, loss_att=42.384, acc=0.743, loss=44.423, backward_time=0.404, grad_norm=34.835, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.483e-04, train_time=1.557 +[gpub010:0/16] 2024-01-30 03:35:01,339 (trainer:737) INFO: 17epoch:train:3301-3400batch: iter_time=2.826e-04, forward_time=0.351, loss_ctc=51.735, loss_att=54.236, acc=0.733, loss=53.486, backward_time=0.458, grad_norm=30.636, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.483e-04, train_time=1.648 +[gpub010:0/16] 2024-01-30 03:37:54,316 (trainer:737) INFO: 17epoch:train:3401-3500batch: iter_time=9.129e-05, forward_time=0.303, loss_ctc=46.444, loss_att=42.331, acc=0.746, loss=43.565, backward_time=0.411, grad_norm=31.211, clip=100.000, loss_scale=8.775e+33, optim_step_time=0.093, optim0_lr0=2.482e-04, train_time=1.729 +[gpub010:0/16] 2024-01-30 03:40:31,855 (trainer:737) INFO: 17epoch:train:3501-3600batch: iter_time=8.487e-05, forward_time=0.287, loss_ctc=50.250, loss_att=48.475, acc=0.731, loss=49.008, backward_time=0.399, grad_norm=33.129, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.482e-04, train_time=1.576 +[gpub010:0/16] 2024-01-30 03:43:17,453 (trainer:737) INFO: 17epoch:train:3601-3700batch: iter_time=4.530e-04, forward_time=0.361, loss_ctc=60.595, loss_att=57.329, acc=0.714, loss=58.309, backward_time=0.463, grad_norm=43.628, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.481e-04, train_time=1.655 +[gpub010:0/16] 2024-01-30 03:44:56,802 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-01-30 03:45:15,504 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 03:45:19,019 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 03:45:19,019 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-01-30 03:45:19,022 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 03:51:09,069 (trainer:737) INFO: 17epoch:train:3701-3800batch: iter_time=3.064, forward_time=0.318, loss_ctc=48.852, loss_att=45.236, acc=0.755, loss=46.321, backward_time=0.404, grad_norm=33.157, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.481e-04, train_time=4.716 +[gpub010:0/16] 2024-01-30 03:53:36,205 (trainer:737) INFO: 17epoch:train:3801-3900batch: iter_time=8.472e-05, forward_time=0.289, loss_ctc=51.057, loss_att=50.364, acc=0.712, loss=50.572, backward_time=0.406, grad_norm=35.005, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.480e-04, train_time=1.471 +[gpub010:0/16] 2024-01-30 03:56:21,187 (trainer:737) INFO: 17epoch:train:3901-4000batch: iter_time=5.486e-04, forward_time=0.408, loss_ctc=54.336, loss_att=60.517, acc=0.722, loss=58.663, backward_time=0.433, grad_norm=36.156, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=2.480e-04, train_time=1.649 +[gpub010:0/16] 2024-01-30 03:59:02,217 (trainer:737) INFO: 17epoch:train:4001-4100batch: iter_time=8.329e-05, forward_time=0.291, loss_ctc=49.315, loss_att=47.468, acc=0.757, loss=48.022, backward_time=0.401, grad_norm=29.695, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.479e-04, train_time=1.610 +[gpub010:0/16] 2024-01-30 04:01:29,617 (trainer:737) INFO: 17epoch:train:4101-4200batch: iter_time=9.096e-05, forward_time=0.289, loss_ctc=50.188, loss_att=48.277, acc=0.723, loss=48.850, backward_time=0.407, grad_norm=31.474, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.479e-04, train_time=1.474 +[gpub010:0/16] 2024-01-30 04:04:21,163 (trainer:737) INFO: 17epoch:train:4201-4300batch: iter_time=3.848e-04, forward_time=0.354, loss_ctc=49.742, loss_att=42.961, acc=0.764, loss=44.996, backward_time=0.463, grad_norm=30.241, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.478e-04, train_time=1.715 +[gpub010:0/16] 2024-01-30 04:06:37,557 (trainer:737) INFO: 17epoch:train:4301-4400batch: iter_time=8.600e-05, forward_time=0.290, loss_ctc=53.105, loss_att=51.809, acc=0.730, loss=52.197, backward_time=0.401, grad_norm=38.962, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.478e-04, train_time=1.363 +[gpub010:0/16] 2024-01-30 04:09:21,713 (trainer:737) INFO: 17epoch:train:4401-4500batch: iter_time=8.855e-05, forward_time=0.317, loss_ctc=48.449, loss_att=46.780, acc=0.732, loss=47.281, backward_time=0.418, grad_norm=32.035, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.477e-04, train_time=1.642 +[gpub010:0/16] 2024-01-30 04:11:57,018 (trainer:737) INFO: 17epoch:train:4501-4600batch: iter_time=3.885e-04, forward_time=0.348, loss_ctc=51.421, loss_att=44.695, acc=0.767, loss=46.713, backward_time=0.435, grad_norm=30.809, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=2.477e-04, train_time=1.552 +[gpub010:0/16] 2024-01-30 04:14:26,964 (trainer:737) INFO: 17epoch:train:4601-4700batch: iter_time=8.019e-05, forward_time=0.294, loss_ctc=48.732, loss_att=52.391, acc=0.739, loss=51.293, backward_time=0.412, grad_norm=30.653, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.476e-04, train_time=1.499 +[gpub010:0/16] 2024-01-30 04:17:33,992 (trainer:737) INFO: 17epoch:train:4701-4800batch: iter_time=3.358e-04, forward_time=0.389, loss_ctc=44.670, loss_att=39.883, acc=0.764, loss=41.319, backward_time=0.418, grad_norm=29.438, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=2.476e-04, train_time=1.871 +[gpub010:0/16] 2024-01-30 04:19:53,340 (trainer:737) INFO: 17epoch:train:4801-4900batch: iter_time=9.112e-05, forward_time=0.292, loss_ctc=62.875, loss_att=65.411, acc=0.709, loss=64.650, backward_time=0.406, grad_norm=49.083, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.475e-04, train_time=1.393 +[gpub010:0/16] 2024-01-30 04:22:40,036 (trainer:737) INFO: 17epoch:train:4901-5000batch: iter_time=8.803e-05, forward_time=0.398, loss_ctc=51.763, loss_att=43.537, acc=0.769, loss=46.005, backward_time=0.424, grad_norm=30.649, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.126, optim0_lr0=2.475e-04, train_time=1.667 +[gpub010:0/16] 2024-01-30 04:23:00,170 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-01-30 04:23:18,993 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 04:23:22,556 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 04:23:22,556 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-01-30 04:23:22,559 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 04:33:29,753 (trainer:737) INFO: 17epoch:train:5001-5100batch: iter_time=3.062, forward_time=0.287, loss_ctc=46.950, loss_att=47.563, acc=0.727, loss=47.379, backward_time=0.399, grad_norm=33.112, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.474e-04, train_time=6.497 +[gpub010:0/16] 2024-01-30 04:36:06,799 (trainer:737) INFO: 17epoch:train:5101-5200batch: iter_time=7.984e-05, forward_time=0.352, loss_ctc=53.175, loss_att=49.437, acc=0.727, loss=50.558, backward_time=0.443, grad_norm=33.871, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.474e-04, train_time=1.570 +[gpub010:0/16] 2024-01-30 04:38:42,934 (trainer:737) INFO: 17epoch:train:5201-5300batch: iter_time=8.706e-05, forward_time=0.317, loss_ctc=49.950, loss_att=53.183, acc=0.744, loss=52.213, backward_time=0.415, grad_norm=30.667, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.473e-04, train_time=1.561 +[gpub010:0/16] 2024-01-30 04:41:30,577 (trainer:737) INFO: 17epoch:train:5301-5400batch: iter_time=8.556e-05, forward_time=0.291, loss_ctc=53.494, loss_att=53.270, acc=0.737, loss=53.337, backward_time=0.403, grad_norm=32.581, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.473e-04, train_time=1.676 +[gpub010:0/16] 2024-01-30 04:43:21,516 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 04:43:57,171 (trainer:737) INFO: 17epoch:train:5401-5500batch: iter_time=8.569e-05, forward_time=0.355, loss_ctc=46.924, loss_att=43.719, acc=0.731, loss=44.681, backward_time=0.465, grad_norm=31.249, clip=100.000, loss_scale=1.469e+34, optim_step_time=0.103, optim0_lr0=2.472e-04, train_time=1.466 +[gpub010:0/16] 2024-01-30 04:46:36,110 (trainer:737) INFO: 17epoch:train:5501-5600batch: iter_time=8.246e-05, forward_time=0.291, loss_ctc=52.564, loss_att=49.067, acc=0.758, loss=50.116, backward_time=0.405, grad_norm=39.183, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.472e-04, train_time=1.589 +[gpub010:0/16] 2024-01-30 04:49:23,588 (trainer:737) INFO: 17epoch:train:5601-5700batch: iter_time=8.678e-05, forward_time=0.288, loss_ctc=52.258, loss_att=50.010, acc=0.736, loss=50.685, backward_time=0.400, grad_norm=31.866, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.471e-04, train_time=1.675 +[gpub010:0/16] 2024-01-30 04:52:00,361 (trainer:737) INFO: 17epoch:train:5701-5800batch: iter_time=9.119e-05, forward_time=0.382, loss_ctc=48.555, loss_att=41.684, acc=0.753, loss=43.746, backward_time=0.438, grad_norm=32.444, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.471e-04, train_time=1.567 +[gpub010:0/16] 2024-01-30 04:54:18,546 (trainer:737) INFO: 17epoch:train:5801-5900batch: iter_time=8.625e-05, forward_time=0.294, loss_ctc=51.256, loss_att=54.823, acc=0.748, loss=53.753, backward_time=0.408, grad_norm=30.044, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.470e-04, train_time=1.382 +[gpub010:0/16] 2024-01-30 04:57:21,374 (trainer:737) INFO: 17epoch:train:5901-6000batch: iter_time=8.748e-05, forward_time=0.368, loss_ctc=45.968, loss_att=42.315, acc=0.757, loss=43.411, backward_time=0.452, grad_norm=29.723, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.470e-04, train_time=1.828 +[gpub010:0/16] 2024-01-30 04:59:51,782 (trainer:737) INFO: 17epoch:train:6001-6100batch: iter_time=8.804e-05, forward_time=0.288, loss_ctc=48.693, loss_att=47.555, acc=0.741, loss=47.897, backward_time=0.402, grad_norm=31.975, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.469e-04, train_time=1.503 +[gpub010:0/16] 2024-01-30 05:02:06,153 (trainer:737) INFO: 17epoch:train:6101-6200batch: iter_time=9.449e-05, forward_time=0.293, loss_ctc=59.736, loss_att=57.162, acc=0.733, loss=57.935, backward_time=0.405, grad_norm=38.896, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.469e-04, train_time=1.344 +[gpub010:0/16] 2024-01-30 05:04:06,823 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-01-30 05:04:26,061 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 05:04:29,633 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 05:04:29,633 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-01-30 05:04:29,637 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 05:09:58,439 (trainer:737) INFO: 17epoch:train:6201-6300batch: iter_time=2.929, forward_time=0.385, loss_ctc=47.853, loss_att=45.289, acc=0.754, loss=46.058, backward_time=0.448, grad_norm=31.188, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.468e-04, train_time=4.722 +[gpub010:0/16] 2024-01-30 05:12:08,580 (trainer:737) INFO: 17epoch:train:6301-6400batch: iter_time=8.746e-05, forward_time=0.288, loss_ctc=50.162, loss_att=48.055, acc=0.719, loss=48.687, backward_time=0.400, grad_norm=33.785, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.468e-04, train_time=1.302 +[gpub010:0/16] 2024-01-30 05:15:11,929 (trainer:737) INFO: 17epoch:train:6401-6500batch: iter_time=9.630e-05, forward_time=0.351, loss_ctc=53.806, loss_att=59.169, acc=0.726, loss=57.560, backward_time=0.477, grad_norm=34.770, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.467e-04, train_time=1.833 +[gpub010:0/16] 2024-01-30 05:17:45,226 (trainer:737) INFO: 17epoch:train:6501-6600batch: iter_time=8.734e-05, forward_time=0.312, loss_ctc=49.167, loss_att=46.959, acc=0.759, loss=47.621, backward_time=0.405, grad_norm=29.022, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.467e-04, train_time=1.532 +[gpub010:0/16] 2024-01-30 05:20:18,175 (trainer:737) INFO: 17epoch:train:6601-6700batch: iter_time=9.118e-05, forward_time=0.398, loss_ctc=49.409, loss_att=47.571, acc=0.726, loss=48.123, backward_time=0.432, grad_norm=33.404, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.111, optim0_lr0=2.466e-04, train_time=1.530 +[gpub010:0/16] 2024-01-30 05:22:59,359 (trainer:737) INFO: 17epoch:train:6701-6800batch: iter_time=1.020e-04, forward_time=0.289, loss_ctc=48.645, loss_att=42.090, acc=0.767, loss=44.056, backward_time=0.400, grad_norm=29.492, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.466e-04, train_time=1.612 +[gpub010:0/16] 2024-01-30 05:25:46,450 (trainer:737) INFO: 17epoch:train:6801-6900batch: iter_time=8.745e-05, forward_time=0.404, loss_ctc=52.126, loss_att=50.653, acc=0.733, loss=51.095, backward_time=0.426, grad_norm=41.329, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.136, optim0_lr0=2.465e-04, train_time=1.671 +[gpub010:0/16] 2024-01-30 05:28:01,714 (trainer:737) INFO: 17epoch:train:6901-7000batch: iter_time=9.352e-05, forward_time=0.301, loss_ctc=47.961, loss_att=46.526, acc=0.733, loss=46.957, backward_time=0.403, grad_norm=33.431, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.465e-04, train_time=1.353 +[gpub010:0/16] 2024-01-30 05:28:27,764 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 05:30:36,300 (trainer:737) INFO: 17epoch:train:7001-7100batch: iter_time=8.916e-05, forward_time=0.291, loss_ctc=50.969, loss_att=43.813, acc=0.771, loss=45.960, backward_time=0.403, grad_norm=31.476, clip=100.000, loss_scale=6.189e+33, optim_step_time=0.092, optim0_lr0=2.464e-04, train_time=1.546 +[gpub010:0/16] 2024-01-30 05:33:32,323 (trainer:737) INFO: 17epoch:train:7101-7200batch: iter_time=6.149e-04, forward_time=0.401, loss_ctc=48.434, loss_att=51.490, acc=0.741, loss=50.573, backward_time=0.424, grad_norm=30.598, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=2.464e-04, train_time=1.760 +[gpub010:0/16] 2024-01-30 05:35:47,909 (trainer:737) INFO: 17epoch:train:7201-7300batch: iter_time=8.637e-05, forward_time=0.290, loss_ctc=44.591, loss_att=39.465, acc=0.766, loss=41.002, backward_time=0.402, grad_norm=28.016, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.463e-04, train_time=1.356 +[gpub010:0/16] 2024-01-30 05:38:33,718 (trainer:737) INFO: 17epoch:train:7301-7400batch: iter_time=1.010e-04, forward_time=0.371, loss_ctc=60.988, loss_att=63.547, acc=0.712, loss=62.780, backward_time=0.487, grad_norm=42.464, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=2.463e-04, train_time=1.658 +[gpub010:0/16] 2024-01-30 05:41:20,016 (trainer:737) INFO: 17epoch:train:7401-7500batch: iter_time=8.519e-05, forward_time=0.290, loss_ctc=51.134, loss_att=42.928, acc=0.770, loss=45.390, backward_time=0.402, grad_norm=29.602, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.462e-04, train_time=1.663 +[gpub010:0/16] 2024-01-30 05:41:40,043 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-01-30 05:41:59,477 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 05:42:03,088 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 05:42:03,088 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-01-30 05:42:03,091 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 05:48:52,552 (trainer:737) INFO: 17epoch:train:7501-7600batch: iter_time=3.053, forward_time=0.340, loss_ctc=46.790, loss_att=51.009, acc=0.711, loss=49.744, backward_time=0.468, grad_norm=35.464, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.462e-04, train_time=4.525 +[gpub010:0/16] 2024-01-30 05:51:25,143 (trainer:737) INFO: 17epoch:train:7601-7700batch: iter_time=8.334e-05, forward_time=0.289, loss_ctc=52.616, loss_att=49.745, acc=0.723, loss=50.607, backward_time=0.402, grad_norm=35.005, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.461e-04, train_time=1.526 +[gpub010:0/16] 2024-01-30 05:54:08,700 (trainer:737) INFO: 17epoch:train:7701-7800batch: iter_time=8.235e-05, forward_time=0.314, loss_ctc=50.404, loss_att=54.392, acc=0.736, loss=53.196, backward_time=0.457, grad_norm=31.962, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.461e-04, train_time=1.635 +[gpub010:0/16] 2024-01-30 05:56:30,501 (trainer:737) INFO: 17epoch:train:7801-7900batch: iter_time=8.625e-05, forward_time=0.341, loss_ctc=53.384, loss_att=53.487, acc=0.727, loss=53.456, backward_time=0.409, grad_norm=33.988, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.460e-04, train_time=1.418 +[gpub010:0/16] 2024-01-30 05:58:41,423 (trainer:737) INFO: 17epoch:train:7901-8000batch: iter_time=8.697e-05, forward_time=0.287, loss_ctc=46.767, loss_att=44.416, acc=0.718, loss=45.121, backward_time=0.399, grad_norm=32.300, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.460e-04, train_time=1.309 +[gpub010:0/16] 2024-01-30 06:01:41,229 (trainer:737) INFO: 17epoch:train:8001-8100batch: iter_time=2.087e-04, forward_time=0.353, loss_ctc=50.321, loss_att=46.873, acc=0.754, loss=47.907, backward_time=0.493, grad_norm=35.649, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.459e-04, train_time=1.797 +[gpub010:0/16] 2024-01-30 06:04:14,128 (trainer:737) INFO: 17epoch:train:8101-8200batch: iter_time=8.759e-05, forward_time=0.288, loss_ctc=51.721, loss_att=48.422, acc=0.739, loss=49.412, backward_time=0.400, grad_norm=32.019, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.459e-04, train_time=1.528 +[gpub010:0/16] 2024-01-30 06:06:44,438 (trainer:737) INFO: 17epoch:train:8201-8300batch: iter_time=8.452e-05, forward_time=0.411, loss_ctc=47.785, loss_att=42.311, acc=0.744, loss=43.953, backward_time=0.446, grad_norm=40.768, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.458e-04, train_time=1.504 +[gpub010:0/16] 2024-01-30 06:09:08,984 (trainer:737) INFO: 17epoch:train:8301-8400batch: iter_time=8.171e-05, forward_time=0.292, loss_ctc=51.161, loss_att=54.424, acc=0.733, loss=53.445, backward_time=0.404, grad_norm=31.302, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.458e-04, train_time=1.445 +[gpub010:0/16] 2024-01-30 06:12:00,218 (trainer:737) INFO: 17epoch:train:8401-8500batch: iter_time=8.532e-05, forward_time=0.305, loss_ctc=45.591, loss_att=42.267, acc=0.749, loss=43.264, backward_time=0.418, grad_norm=29.736, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=2.457e-04, train_time=1.713 +[gpub010:0/16] 2024-01-30 06:14:22,676 (trainer:737) INFO: 17epoch:train:8501-8600batch: iter_time=8.883e-05, forward_time=0.344, loss_ctc=48.648, loss_att=48.194, acc=0.733, loss=48.330, backward_time=0.448, grad_norm=32.024, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.457e-04, train_time=1.424 +[gpub010:0/16] 2024-01-30 06:16:52,885 (trainer:737) INFO: 17epoch:train:8601-8700batch: iter_time=9.290e-05, forward_time=0.292, loss_ctc=60.449, loss_att=57.372, acc=0.717, loss=58.295, backward_time=0.404, grad_norm=43.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.456e-04, train_time=1.501 +[gpub010:0/16] 2024-01-30 06:18:20,138 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-01-30 06:18:39,047 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 06:18:42,641 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 06:18:42,641 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-01-30 06:18:42,645 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 06:24:21,983 (trainer:737) INFO: 17epoch:train:8701-8800batch: iter_time=3.063, forward_time=0.366, loss_ctc=48.018, loss_att=44.052, acc=0.759, loss=45.242, backward_time=0.438, grad_norm=32.494, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=2.456e-04, train_time=4.491 +[gpub010:0/16] 2024-01-30 06:26:31,693 (trainer:737) INFO: 17epoch:train:8801-8900batch: iter_time=8.119e-05, forward_time=0.288, loss_ctc=49.829, loss_att=48.423, acc=0.718, loss=48.845, backward_time=0.400, grad_norm=34.730, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.455e-04, train_time=1.297 +[gpub010:0/16] 2024-01-30 06:28:59,759 (trainer:737) INFO: 17epoch:train:8901-9000batch: iter_time=8.704e-05, forward_time=0.297, loss_ctc=53.263, loss_att=60.196, acc=0.724, loss=58.116, backward_time=0.411, grad_norm=34.747, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.455e-04, train_time=1.480 +[gpub010:0/16] 2024-01-30 06:31:59,089 (trainer:737) INFO: 17epoch:train:9001-9100batch: iter_time=8.563e-05, forward_time=0.397, loss_ctc=49.114, loss_att=47.138, acc=0.758, loss=47.731, backward_time=0.433, grad_norm=30.292, clip=100.000, loss_scale=9.346e+33, optim_step_time=0.099, optim0_lr0=2.454e-04, train_time=1.794 +[gpub010:0/16] 2024-01-30 06:34:09,424 (trainer:737) INFO: 17epoch:train:9101-9200batch: iter_time=8.526e-05, forward_time=0.291, loss_ctc=48.879, loss_att=47.437, acc=0.728, loss=47.870, backward_time=0.403, grad_norm=31.994, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.454e-04, train_time=1.303 +[gpub010:0/16] 2024-01-30 06:34:18,539 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 06:37:07,051 (trainer:737) INFO: 17epoch:train:9201-9300batch: iter_time=8.243e-05, forward_time=0.364, loss_ctc=49.035, loss_att=42.139, acc=0.766, loss=44.207, backward_time=0.450, grad_norm=33.210, clip=100.000, loss_scale=5.507e+33, optim_step_time=0.102, optim0_lr0=2.453e-04, train_time=1.776 +[gpub010:0/16] 2024-01-30 06:39:18,112 (trainer:737) INFO: 17epoch:train:9301-9400batch: iter_time=8.286e-05, forward_time=0.290, loss_ctc=51.809, loss_att=50.254, acc=0.737, loss=50.720, backward_time=0.404, grad_norm=42.075, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.453e-04, train_time=1.311 +[gpub010:0/16] 2024-01-30 06:41:56,762 (trainer:737) INFO: 17epoch:train:9401-9500batch: iter_time=8.214e-05, forward_time=0.346, loss_ctc=47.421, loss_att=45.906, acc=0.736, loss=46.360, backward_time=0.452, grad_norm=32.360, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=2.452e-04, train_time=1.586 +[gpub010:0/16] 2024-01-30 06:44:33,077 (trainer:737) INFO: 17epoch:train:9501-9600batch: iter_time=9.182e-05, forward_time=0.291, loss_ctc=50.588, loss_att=43.841, acc=0.770, loss=45.865, backward_time=0.402, grad_norm=30.378, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.452e-04, train_time=1.563 +[gpub010:0/16] 2024-01-30 06:46:46,763 (trainer:737) INFO: 17epoch:train:9601-9700batch: iter_time=8.082e-05, forward_time=0.293, loss_ctc=48.084, loss_att=51.301, acc=0.741, loss=50.336, backward_time=0.408, grad_norm=30.804, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.451e-04, train_time=1.337 +[gpub010:0/16] 2024-01-30 06:49:50,601 (trainer:737) INFO: 17epoch:train:9701-9800batch: iter_time=9.727e-05, forward_time=0.386, loss_ctc=44.279, loss_att=39.678, acc=0.766, loss=41.059, backward_time=0.438, grad_norm=29.651, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=2.451e-04, train_time=1.837 +[gpub010:0/16] 2024-01-30 06:52:05,828 (trainer:737) INFO: 17epoch:train:9801-9900batch: iter_time=8.820e-05, forward_time=0.293, loss_ctc=60.891, loss_att=63.871, acc=0.713, loss=62.977, backward_time=0.406, grad_norm=46.643, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.450e-04, train_time=1.353 +[gpub010:0/16] 2024-01-30 06:54:34,231 (trainer:737) INFO: 17epoch:train:9901-10000batch: iter_time=8.748e-05, forward_time=0.375, loss_ctc=50.294, loss_att=43.255, acc=0.769, loss=45.367, backward_time=0.444, grad_norm=30.529, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.450e-04, train_time=1.483 +[gpub010:0/16] 2024-01-30 06:54:54,335 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-01-30 06:55:13,672 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 06:55:17,273 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 06:55:17,274 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-01-30 06:55:17,277 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 07:02:29,202 (trainer:737) INFO: 17epoch:train:10001-10100batch: iter_time=3.221, forward_time=0.297, loss_ctc=46.435, loss_att=47.307, acc=0.729, loss=47.046, backward_time=0.399, grad_norm=32.798, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.449e-04, train_time=4.750 +[gpub010:0/16] 2024-01-30 07:04:40,864 (trainer:737) INFO: 17epoch:train:10101-10200batch: iter_time=8.024e-05, forward_time=0.289, loss_ctc=51.972, loss_att=48.238, acc=0.732, loss=49.358, backward_time=0.401, grad_norm=34.389, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=2.449e-04, train_time=1.316 +[gpub010:0/16] 2024-01-30 07:07:37,067 (trainer:737) INFO: 17epoch:train:10201-10300batch: iter_time=8.770e-05, forward_time=0.424, loss_ctc=49.813, loss_att=52.767, acc=0.747, loss=51.881, backward_time=0.423, grad_norm=31.003, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=2.448e-04, train_time=1.762 +[gpub010:0/16] 2024-01-30 07:10:08,190 (trainer:737) INFO: 17epoch:train:10301-10400batch: iter_time=8.234e-05, forward_time=0.292, loss_ctc=52.702, loss_att=52.378, acc=0.741, loss=52.475, backward_time=0.403, grad_norm=31.518, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.448e-04, train_time=1.511 +[gpub010:0/16] 2024-01-30 07:12:35,528 (trainer:737) INFO: 17epoch:train:10401-10500batch: iter_time=8.534e-05, forward_time=0.356, loss_ctc=46.331, loss_att=43.224, acc=0.731, loss=44.156, backward_time=0.443, grad_norm=31.294, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=2.447e-04, train_time=1.473 +[gpub010:0/16] 2024-01-30 07:14:55,622 (trainer:737) INFO: 17epoch:train:10501-10600batch: iter_time=8.167e-05, forward_time=0.294, loss_ctc=49.825, loss_att=48.913, acc=0.755, loss=49.187, backward_time=0.401, grad_norm=35.973, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.447e-04, train_time=1.400 +[gpub010:0/16] 2024-01-30 07:17:40,595 (trainer:737) INFO: 17epoch:train:10601-10700batch: iter_time=8.352e-05, forward_time=0.289, loss_ctc=51.509, loss_att=49.043, acc=0.741, loss=49.783, backward_time=0.399, grad_norm=30.707, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.446e-04, train_time=1.650 +[gpub010:0/16] 2024-01-30 07:20:11,022 (trainer:737) INFO: 17epoch:train:10701-10800batch: iter_time=8.665e-05, forward_time=0.356, loss_ctc=47.140, loss_att=40.998, acc=0.758, loss=42.841, backward_time=0.481, grad_norm=32.030, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=2.446e-04, train_time=1.504 +[gpub010:0/16] 2024-01-30 07:22:32,188 (trainer:737) INFO: 17epoch:train:10801-10900batch: iter_time=8.657e-05, forward_time=0.292, loss_ctc=50.975, loss_att=53.840, acc=0.749, loss=52.980, backward_time=0.406, grad_norm=30.462, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.445e-04, train_time=1.410 +[gpub010:0/16] 2024-01-30 07:25:35,301 (trainer:737) INFO: 17epoch:train:10901-11000batch: iter_time=8.804e-05, forward_time=0.363, loss_ctc=45.244, loss_att=41.125, acc=0.762, loss=42.361, backward_time=0.463, grad_norm=29.660, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.445e-04, train_time=1.832 +[gpub010:0/16] 2024-01-30 07:27:47,908 (trainer:737) INFO: 17epoch:train:11001-11100batch: iter_time=8.480e-05, forward_time=0.305, loss_ctc=48.011, loss_att=47.533, acc=0.742, loss=47.677, backward_time=0.404, grad_norm=31.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.444e-04, train_time=1.326 +[gpub010:0/16] 2024-01-30 07:30:06,302 (trainer:737) INFO: 17epoch:train:11101-11200batch: iter_time=8.486e-05, forward_time=0.293, loss_ctc=58.793, loss_att=56.657, acc=0.735, loss=57.298, backward_time=0.406, grad_norm=38.237, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.444e-04, train_time=1.382 +[gpub010:0/16] 2024-01-30 07:31:56,155 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-01-30 07:32:15,700 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 07:32:19,283 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 07:32:19,284 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-01-30 07:32:19,287 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 07:38:09,003 (trainer:737) INFO: 17epoch:train:11201-11300batch: iter_time=3.117, forward_time=0.387, loss_ctc=47.369, loss_att=45.117, acc=0.755, loss=45.793, backward_time=0.434, grad_norm=31.420, clip=100.000, loss_scale=1.002e+34, optim_step_time=0.097, optim0_lr0=2.443e-04, train_time=4.828 +[gpub010:0/16] 2024-01-30 07:40:18,328 (trainer:737) INFO: 17epoch:train:11301-11400batch: iter_time=8.134e-05, forward_time=0.286, loss_ctc=49.583, loss_att=49.735, acc=0.707, loss=49.690, backward_time=0.398, grad_norm=37.865, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.443e-04, train_time=1.293 +[gpub010:0/16] 2024-01-30 07:43:09,730 (trainer:737) INFO: 17epoch:train:11401-11500batch: iter_time=8.101e-05, forward_time=0.382, loss_ctc=53.368, loss_att=59.295, acc=0.722, loss=57.517, backward_time=0.480, grad_norm=36.883, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.443e-04, train_time=1.714 +[gpub010:0/16] 2024-01-30 07:46:03,777 (trainer:737) INFO: 17epoch:train:11501-11600batch: iter_time=8.121e-05, forward_time=0.404, loss_ctc=48.969, loss_att=48.523, acc=0.749, loss=48.657, backward_time=0.438, grad_norm=29.895, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.141, optim0_lr0=2.442e-04, train_time=1.740 +[gpub010:0/16] 2024-01-30 07:48:24,138 (trainer:737) INFO: 17epoch:train:11601-11700batch: iter_time=8.805e-05, forward_time=0.288, loss_ctc=48.679, loss_att=47.302, acc=0.720, loss=47.715, backward_time=0.400, grad_norm=31.873, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.442e-04, train_time=1.404 +[gpub010:0/16] 2024-01-30 07:51:11,307 (trainer:737) INFO: 17epoch:train:11701-11800batch: iter_time=8.434e-05, forward_time=0.348, loss_ctc=48.472, loss_att=42.777, acc=0.755, loss=44.485, backward_time=0.452, grad_norm=30.628, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=2.441e-04, train_time=1.672 +[gpub010:0/16] 2024-01-30 07:53:33,755 (trainer:737) INFO: 17epoch:train:11801-11900batch: iter_time=8.265e-05, forward_time=0.290, loss_ctc=51.464, loss_att=47.477, acc=0.741, loss=48.673, backward_time=0.401, grad_norm=37.359, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.441e-04, train_time=1.424 +[gpub010:0/16] 2024-01-30 07:55:59,250 (trainer:737) INFO: 17epoch:train:11901-12000batch: iter_time=8.259e-05, forward_time=0.288, loss_ctc=47.189, loss_att=47.508, acc=0.725, loss=47.412, backward_time=0.404, grad_norm=33.256, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=2.440e-04, train_time=1.455 +[gpub010:0/16] 2024-01-30 07:58:53,706 (trainer:737) INFO: 17epoch:train:12001-12100batch: iter_time=8.210e-05, forward_time=0.402, loss_ctc=50.390, loss_att=44.063, acc=0.761, loss=45.961, backward_time=0.419, grad_norm=30.309, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.440e-04, train_time=1.744 +[gpub010:0/16] 2024-01-30 08:01:20,086 (trainer:737) INFO: 17epoch:train:12101-12200batch: iter_time=8.386e-05, forward_time=0.290, loss_ctc=47.725, loss_att=51.234, acc=0.732, loss=50.181, backward_time=0.403, grad_norm=30.976, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.439e-04, train_time=1.464 +[gpub010:0/16] 2024-01-30 08:03:58,320 (trainer:737) INFO: 17epoch:train:12201-12300batch: iter_time=3.601e-04, forward_time=0.360, loss_ctc=44.387, loss_att=39.958, acc=0.757, loss=41.287, backward_time=0.477, grad_norm=29.197, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=2.439e-04, train_time=1.582 +[gpub010:0/16] 2024-01-30 08:06:43,312 (trainer:737) INFO: 17epoch:train:12301-12400batch: iter_time=8.791e-05, forward_time=0.291, loss_ctc=61.350, loss_att=63.557, acc=0.696, loss=62.895, backward_time=0.403, grad_norm=48.063, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.438e-04, train_time=1.650 +[gpub010:0/16] 2024-01-30 08:09:17,575 (trainer:737) INFO: 17epoch:train:12401-12500batch: iter_time=3.419e-04, forward_time=0.403, loss_ctc=50.386, loss_att=42.000, acc=0.766, loss=44.516, backward_time=0.430, grad_norm=30.980, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=2.438e-04, train_time=1.542 +[gpub010:0/16] 2024-01-30 08:09:37,701 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-01-30 08:09:57,075 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 08:10:00,669 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 08:10:00,670 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-01-30 08:10:00,673 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 08:17:05,791 (trainer:737) INFO: 17epoch:train:12501-12600batch: iter_time=3.232, forward_time=0.289, loss_ctc=45.755, loss_att=46.983, acc=0.719, loss=46.615, backward_time=0.399, grad_norm=32.561, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.437e-04, train_time=4.683 +[gpub010:0/16] 2024-01-30 08:19:44,948 (trainer:737) INFO: 17epoch:train:12601-12700batch: iter_time=8.354e-05, forward_time=0.394, loss_ctc=52.148, loss_att=48.528, acc=0.727, loss=49.614, backward_time=0.423, grad_norm=35.016, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=2.437e-04, train_time=1.591 +[gpub010:0/16] 2024-01-30 08:22:42,708 (trainer:737) INFO: 17epoch:train:12701-12800batch: iter_time=8.293e-05, forward_time=0.414, loss_ctc=49.339, loss_att=52.905, acc=0.740, loss=51.835, backward_time=0.426, grad_norm=31.442, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.436e-04, train_time=1.778 +[gpub010:0/16] 2024-01-30 08:25:12,482 (trainer:737) INFO: 17epoch:train:12801-12900batch: iter_time=9.131e-05, forward_time=0.291, loss_ctc=52.191, loss_att=52.214, acc=0.730, loss=52.207, backward_time=0.402, grad_norm=33.916, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.436e-04, train_time=1.497 +[gpub010:0/16] 2024-01-30 08:27:39,470 (trainer:737) INFO: 17epoch:train:12901-13000batch: iter_time=2.871e-04, forward_time=0.363, loss_ctc=45.823, loss_att=43.426, acc=0.721, loss=44.145, backward_time=0.458, grad_norm=32.330, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=2.435e-04, train_time=1.470 +[gpub010:0/16] 2024-01-30 08:30:34,264 (trainer:737) INFO: 17epoch:train:13001-13100batch: iter_time=8.595e-05, forward_time=0.289, loss_ctc=50.063, loss_att=47.131, acc=0.756, loss=48.011, backward_time=0.400, grad_norm=36.933, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.435e-04, train_time=1.747 +[gpub010:0/16] 2024-01-30 08:33:00,883 (trainer:737) INFO: 17epoch:train:13101-13200batch: iter_time=8.945e-05, forward_time=0.350, loss_ctc=51.462, loss_att=47.897, acc=0.740, loss=48.966, backward_time=0.497, grad_norm=33.054, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=2.434e-04, train_time=1.467 +[gpub010:0/16] 2024-01-30 08:33:19,098 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 08:35:26,216 (trainer:737) INFO: 17epoch:train:13201-13300batch: iter_time=8.766e-05, forward_time=0.377, loss_ctc=47.396, loss_att=41.684, acc=0.746, loss=43.397, backward_time=0.460, grad_norm=33.813, clip=100.000, loss_scale=1.101e+34, optim_step_time=0.100, optim0_lr0=2.434e-04, train_time=1.452 +[gpub010:0/16] 2024-01-30 08:38:19,660 (trainer:737) INFO: 17epoch:train:13301-13400batch: iter_time=9.236e-05, forward_time=0.291, loss_ctc=50.805, loss_att=53.764, acc=0.736, loss=52.877, backward_time=0.404, grad_norm=30.888, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.433e-04, train_time=1.735 +[gpub010:0/16] 2024-01-30 08:40:47,866 (trainer:737) INFO: 17epoch:train:13401-13500batch: iter_time=9.404e-05, forward_time=0.414, loss_ctc=45.024, loss_att=41.431, acc=0.753, loss=42.509, backward_time=0.446, grad_norm=29.553, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=2.433e-04, train_time=1.481 +[gpub010:0/16] 2024-01-30 08:43:14,291 (trainer:737) INFO: 17epoch:train:13501-13600batch: iter_time=5.582e-04, forward_time=0.394, loss_ctc=47.793, loss_att=47.163, acc=0.736, loss=47.352, backward_time=0.453, grad_norm=31.529, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=2.432e-04, train_time=1.465 +[gpub010:0/16] 2024-01-30 08:46:05,577 (trainer:737) INFO: 17epoch:train:13601-13700batch: iter_time=9.214e-05, forward_time=0.298, loss_ctc=58.587, loss_att=56.905, acc=0.719, loss=57.410, backward_time=0.405, grad_norm=43.462, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=2.432e-04, train_time=1.711 +[gpub010:0/16] 2024-01-30 08:47:35,641 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-01-30 08:47:55,191 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 08:47:58,809 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 08:47:58,809 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-01-30 08:47:58,812 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-01-30 08:53:42,878 (trainer:737) INFO: 17epoch:train:13701-13800batch: iter_time=3.086, forward_time=0.385, loss_ctc=47.259, loss_att=43.060, acc=0.757, loss=44.320, backward_time=0.418, grad_norm=33.787, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=2.431e-04, train_time=4.574 +[gpub010:0/16] 2024-01-30 08:54:11,379 (trainer:668) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-01-30 08:56:08,900 (trainer:737) INFO: 17epoch:train:13801-13900batch: iter_time=8.068e-05, forward_time=0.289, loss_ctc=49.681, loss_att=48.018, acc=0.711, loss=48.517, backward_time=0.398, grad_norm=34.009, clip=100.000, loss_scale=6.294e+33, optim_step_time=0.092, optim0_lr0=2.431e-04, train_time=1.459 +[gpub010:0/16] 2024-01-30 08:58:48,261 (trainer:737) INFO: 17epoch:train:13901-14000batch: iter_time=8.264e-05, forward_time=0.342, loss_ctc=52.943, loss_att=57.802, acc=0.724, loss=56.345, backward_time=0.473, grad_norm=36.478, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=2.430e-04, train_time=1.594 +[gpub010:0/16] 2024-01-30 09:01:29,481 (trainer:737) INFO: 17epoch:train:14001-14100batch: iter_time=8.159e-05, forward_time=0.298, loss_ctc=48.296, loss_att=46.931, acc=0.754, loss=47.340, backward_time=0.404, grad_norm=30.286, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.430e-04, train_time=1.612 +[gpub010:0/16] 2024-01-30 09:03:43,679 (trainer:737) INFO: 17epoch:train:14101-14200batch: iter_time=8.440e-05, forward_time=0.288, loss_ctc=47.995, loss_att=46.646, acc=0.722, loss=47.051, backward_time=0.404, grad_norm=31.843, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.430e-04, train_time=1.341 +[gpub010:0/16] 2024-01-30 09:06:33,908 (trainer:737) INFO: 17epoch:train:14201-14300batch: iter_time=5.380e-04, forward_time=0.359, loss_ctc=48.540, loss_att=42.555, acc=0.756, loss=44.350, backward_time=0.460, grad_norm=31.517, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.429e-04, train_time=1.703 +[gpub010:0/16] 2024-01-30 09:09:05,134 (trainer:737) INFO: 17epoch:train:14301-14400batch: iter_time=8.407e-05, forward_time=0.295, loss_ctc=51.395, loss_att=46.818, acc=0.741, loss=48.191, backward_time=0.401, grad_norm=40.156, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.429e-04, train_time=1.512 +[gpub010:0/16] 2024-01-30 09:11:46,727 (trainer:737) INFO: 17epoch:train:14401-14500batch: iter_time=2.201e-04, forward_time=0.375, loss_ctc=47.642, loss_att=47.018, acc=0.726, loss=47.206, backward_time=0.429, grad_norm=33.009, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.428e-04, train_time=1.616 +[gpub010:0/16] 2024-01-30 09:14:14,173 (trainer:737) INFO: 17epoch:train:14501-14600batch: iter_time=8.159e-05, forward_time=0.308, loss_ctc=50.106, loss_att=43.909, acc=0.761, loss=45.768, backward_time=0.403, grad_norm=29.904, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=2.428e-04, train_time=1.475 +[gpub010:0/16] 2024-01-30 09:16:57,484 (trainer:737) INFO: 17epoch:train:14601-14700batch: iter_time=8.467e-05, forward_time=0.291, loss_ctc=47.165, loss_att=50.381, acc=0.733, loss=49.416, backward_time=0.428, grad_norm=31.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.427e-04, train_time=1.633 +[gpub010:0/16] 2024-01-30 09:19:29,908 (trainer:737) INFO: 17epoch:train:14701-14800batch: iter_time=8.566e-05, forward_time=0.387, loss_ctc=43.923, loss_att=39.562, acc=0.759, loss=40.870, backward_time=0.422, grad_norm=28.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=2.427e-04, train_time=1.524 +[gpub010:0/16] 2024-01-30 09:22:11,490 (trainer:737) INFO: 17epoch:train:14801-14900batch: iter_time=8.169e-05, forward_time=0.292, loss_ctc=60.219, loss_att=63.252, acc=0.701, loss=62.342, backward_time=0.402, grad_norm=44.449, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=2.426e-04, train_time=1.616 +[gpub010:0/16] 2024-01-30 09:25:09,514 (trainer:737) INFO: 17epoch:train:14901-15000batch: iter_time=2.362e-04, forward_time=0.391, loss_ctc=50.278, loss_att=41.496, acc=0.767, loss=44.130, backward_time=0.437, grad_norm=31.385, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=2.426e-04, train_time=1.780 +[gpub010:0/16] 2024-01-30 10:02:59,919 (trainer:343) INFO: 17epoch results: [train] iter_time=0.245, forward_time=0.328, loss_ctc=50.768, loss_att=48.847, acc=0.738, loss=49.423, backward_time=0.422, grad_norm=33.867, clip=100.000, loss_scale=7.188e+33, optim_step_time=0.097, optim0_lr0=2.462e-04, train_time=1.820, time=7 hours, 35 minutes and 25.16 seconds, total_count=285000, gpu_max_cached_mem_GB=42.072, [valid] loss_ctc=42.916, cer_ctc=0.222, loss_att=45.533, acc=0.641, cer=0.311, wer=1.000, loss=44.748, time=37 minutes and 26.56 seconds, total_count=88749, gpu_max_cached_mem_GB=42.072 +[gpub010:0/16] 2024-01-30 10:03:10,583 (trainer:391) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-01-30 10:03:10,588 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/12epoch.pth +[gpub010:0/16] 2024-01-30 10:03:10,588 (trainer:272) INFO: 18/45epoch started. Estimated time to finish: 1 week, 3 days and 31 minutes +[gpub010:0/16] 2024-01-30 10:03:10,597 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-01-30 10:03:29,028 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-01-30 10:03:32,497 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-01-30 10:03:32,497 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-01-30 10:03:32,500 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2904202.0 ON gpub010 CANCELLED AT 2024-01-30T10:04:03 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.13.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.13.log new file mode 100644 index 0000000000000000000000000000000000000000..f2d76c33bbeaa911dabde5b263be3f23bb869d12 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.13.log @@ -0,0 +1,2549 @@ +# Running on gpub078.delta.ncsa.illinois.edu +# Started at Wed Jan 24 17:44:19 CST 2024 +# SLURMD_NODENAME=gpub078 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2891733 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1706312636 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2891733 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[078,083-084,086]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706139836 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[078,083-084,086]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=57932 +# SLURM_TOPOLOGY_ADDR=ss00.ss12.gpub078 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_901f8055-bf9e-4e3f-b1e1-05051b41b9c5 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_901f8055-bf9e-4e3f-b1e1-05051b41b9c5 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_901f8055-bf9e-4e3f-b1e1-05051b41b9c5 +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessi_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_901f8055-bf9e-4e3f-b1e1-05051b41b9c5 +ng_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_901f8055-bf9e-4e3f-b1e1-05051b41b9c5 +[gpub078:0/16] 2024-01-24 17:48:13,475 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub078:0/16] 2024-01-24 17:48:14,485 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub078:0/16] 2024-01-24 17:48:14,572 (s2t:464) INFO: Vocabulary size: 50002 +[gpub078:0/16] 2024-01-24 17:48:28,276 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub078:0/16] 2024-01-24 17:48:28,283 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub078:0/16] 2024-01-24 17:48:28,283 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub078:0/16] 2024-01-24 17:48:28,283 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub078:0/16] 2024-01-24 17:48:28,313 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub078:0/16] 2024-01-24 17:48:33,708 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 17:48:34,626 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 17:48:34,626 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub078:0/16] 2024-01-24 17:48:34,627 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 17:48:52,512 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub078:57986:57986 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:57986:57986 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:57986:57986 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub078:0/16] 2024-01-24 17:48:58,623 (trainer:284) INFO: 8/45epoch started +[gpub078:0/16] 2024-01-24 17:48:58,667 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub078:0/16] 2024-01-24 17:49:16,343 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 17:49:19,722 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 17:49:19,722 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub078:0/16] 2024-01-24 17:49:19,725 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub083:56788:56788 [1] NCCL INFO cudaDriverVersion 12020 +gpub083:56788:56788 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:56788:56788 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:56788:56840 [1] NCCL INFO NET/IB : No device found. +gpub083:56788:56840 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:56788:56840 [1] NCCL INFO Using network Socket +gpub083:56788:56840 [1] NCCL INFO Setting affinity for GPU 1 to ffffffff,ffffffff +gpub083:56788:56840 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub083:56788:56840 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub083:56788:56840 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub083:56788:56840 [1] NCCL INFO Connected all rings +gpub083:56788:56840 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub083:56788:56840 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub083:56788:56840 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub083:56788:56840 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub083:56788:56840 [1] NCCL INFO Connected all trees +gpub083:56788:56840 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub083:56788:56840 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:56788:56840 [1] NCCL INFO comm 0x11e29d60 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub083:56787:56787 [0] NCCL INFO cudaDriverVersion 12020 +gpub083:56787:56787 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:56787:56787 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:56787:56841 [0] NCCL INFO NET/IB : No device found. +gpub083:56787:56841 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:56787:56841 [0] NCCL INFO Using network Socket +gpub083:56787:56841 [0] NCCL INFO Setting affinity for GPU 0 to ffffffff,ffffffff +gpub083:56787:56841 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub083:56787:56841 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub083:56787:56841 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub083:56787:56841 [0] NCCL INFO Connected all rings +gpub083:56787:56841 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub083:56787:56841 [0] NCCL INFO Connected all trees +gpub083:56787:56841 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub083:56787:56841 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:56787:56841 [0] NCCL INFO comm 0x19d03810 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub084:57372:57372 [2] NCCL INFO cudaDriverVersion 12020 +gpub084:57372:57372 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:57372:57372 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:57372:57431 [2] NCCL INFO NET/IB : No device found. +gpub084:57372:57431 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.184<0> [1]hsn0:141.142.145.184<0> [2]eth0:fe80::4845:b018:cee:4be4%eth0<0> +gpub084:57372:57431 [2] NCCL INFO Using network Socket +gpub084:57372:57431 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub084:57372:57431 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub084:57372:57431 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub084:57372:57431 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub084:57372:57431 [2] NCCL INFO Connected all rings +gpub084:57372:57431 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub084:57372:57431 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub084:57372:57431 [2] NCCL INFO Connected all trees +gpub084:57372:57431 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub084:57372:57431 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:57372:57431 [2] NCCL INFO comm 0x1d369d00 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub083:56789:56789 [2] NCCL INFO cudaDriverVersion 12020 +gpub083:56789:56789 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:56789:56789 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:56789:56839 [2] NCCL INFO NET/IB : No device found. +gpub083:56789:56839 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:56789:56839 [2] NCCL INFO Using network Socket +gpub083:56789:56839 [2] NCCL INFO Setting affinity for GPU 2 to ffffffff,ffffffff +gpub083:56789:56839 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub083:56789:56839 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub083:56789:56839 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub083:56789:56839 [2] NCCL INFO Connected all rings +gpub083:56789:56839 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub083:56789:56839 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub083:56789:56839 [2] NCCL INFO Connected all trees +gpub083:56789:56839 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub083:56789:56839 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:56789:56839 [2] NCCL INFO comm 0x1448f650 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub083:56790:56790 [3] NCCL INFO cudaDriverVersion 12020 +gpub083:56790:56790 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:56790:56790 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:56790:56842 [3] NCCL INFO NET/IB : No device found. +gpub083:56790:56842 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:56790:56842 [3] NCCL INFO Using network Socket +gpub083:56790:56842 [3] NCCL INFO Setting affinity for GPU 3 to ffffffff,ffffffff +gpub083:56790:56842 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub083:56790:56842 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub083:56790:56842 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub083:56790:56842 [3] NCCL INFO Connected all rings +gpub083:56790:56842 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub083:56790:56842 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub083:56790:56842 [3] NCCL INFO Connected all trees +gpub083:56790:56842 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub083:56790:56842 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:56790:56842 [3] NCCL INFO comm 0x163af330 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:57988:57988 [2] NCCL INFO cudaDriverVersion 12020 +gpub078:57988:57988 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:57988:57988 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:57988:58046 [2] NCCL INFO NET/IB : No device found. +gpub078:57988:58046 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.178<0> [1]hsn0:141.142.145.178<0> [2]eth0:fe80::3d2a:d8d2:47cc:f2ae%eth0<0> +gpub078:57988:58046 [2] NCCL INFO Using network Socket +gpub078:57988:58046 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:57988:58046 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub078:57988:58046 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub078:57988:58046 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub078:57988:58046 [2] NCCL INFO Connected all rings +gpub078:57988:58046 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub078:57988:58046 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub078:57988:58046 [2] NCCL INFO Connected all trees +gpub078:57988:58046 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub078:57988:58046 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:57988:58046 [2] NCCL INFO comm 0x1b77cc60 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:57989:57989 [3] NCCL INFO cudaDriverVersion 12020 +gpub078:57989:57989 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:57989:57989 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:57989:58045 [3] NCCL INFO NET/IB : No device found. +gpub078:57989:58045 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.178<0> [1]hsn0:141.142.145.178<0> [2]eth0:fe80::3d2a:d8d2:47cc:f2ae%eth0<0> +gpub078:57989:58045 [3] NCCL INFO Using network Socket +gpub078:57989:58045 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:57989:58045 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub078:57989:58045 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub078:57989:58045 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub078:57989:58045 [3] NCCL INFO Connected all rings +gpub078:57989:58045 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub078:57989:58045 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub078:57989:58045 [3] NCCL INFO Connected all trees +gpub078:57989:58045 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub078:57989:58045 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:57989:58045 [3] NCCL INFO comm 0x14e17e60 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub084:57373:57373 [3] NCCL INFO cudaDriverVersion 12020 +gpub084:57373:57373 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:57373:57373 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:57373:57434 [3] NCCL INFO NET/IB : No device found. +gpub084:57373:57434 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.184<0> [1]hsn0:141.142.145.184<0> [2]eth0:fe80::4845:b018:cee:4be4%eth0<0> +gpub084:57373:57434 [3] NCCL INFO Using network Socket +gpub084:57373:57434 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub084:57373:57434 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub084:57373:57434 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub084:57373:57434 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub084:57373:57434 [3] NCCL INFO Connected all rings +gpub084:57373:57434 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub084:57373:57434 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub084:57373:57434 [3] NCCL INFO Connected all trees +gpub084:57373:57434 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub084:57373:57434 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:57373:57434 [3] NCCL INFO comm 0x18903bb0 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub084:57370:57370 [0] NCCL INFO cudaDriverVersion 12020 +gpub084:57370:57370 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:57370:57370 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:57370:57433 [0] NCCL INFO NET/IB : No device found. +gpub084:57370:57433 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.184<0> [1]hsn0:141.142.145.184<0> [2]eth0:fe80::4845:b018:cee:4be4%eth0<0> +gpub084:57370:57433 [0] NCCL INFO Using network Socket +gpub084:57370:57433 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub084:57370:57433 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub084:57370:57433 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub084:57370:57433 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub084:57370:57433 [0] NCCL INFO Connected all rings +gpub084:57370:57433 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub084:57370:57433 [0] NCCL INFO Connected all trees +gpub084:57370:57433 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub084:57370:57433 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:57370:57433 [0] NCCL INFO comm 0x15f25ec0 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:57986:58043 [0] NCCL INFO NET/IB : No device found. +gpub078:57986:58043 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.178<0> [1]hsn0:141.142.145.178<0> [2]eth0:fe80::3d2a:d8d2:47cc:f2ae%eth0<0> +gpub078:57986:58043 [0] NCCL INFO Using network Socket +gpub078:57986:58043 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:57986:58043 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub078:57986:58043 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub078:57986:58043 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub078:57986:58043 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub078:57986:58043 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub078:57986:58043 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub078:57986:58043 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub078:57986:58043 [0] NCCL INFO Connected all rings +gpub078:57986:58043 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub078:57986:58043 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub078:57986:58043 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpub078:57986:58043 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub078:57986:58043 [0] NCCL INFO Connected all trees +gpub078:57986:58043 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub078:57986:58043 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:57986:58043 [0] NCCL INFO comm 0x1751a9a0 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub086:138843:138843 [2] NCCL INFO cudaDriverVersion 12020 +gpub086:138843:138843 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.186<0> +gpub086:138843:138843 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub086:138843:138896 [2] NCCL INFO NET/IB : No device found. +gpub086:138843:138896 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.186<0> [1]hsn0:141.142.145.186<0> [2]eth0:fe80::6790:58e4:2640:df66%eth0<0> +gpub086:138843:138896 [2] NCCL INFO Using network Socket +gpub086:138843:138896 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub086:138843:138896 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub086:138843:138896 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub086:138843:138896 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub086:138843:138896 [2] NCCL INFO Connected all rings +gpub086:138843:138896 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub086:138843:138896 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub086:138843:138896 [2] NCCL INFO Connected all trees +gpub086:138843:138896 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub086:138843:138896 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub086:138843:138896 [2] NCCL INFO comm 0xf2b9ab0 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub086:138841:138841 [0] NCCL INFO cudaDriverVersion 12020 +gpub086:138841:138841 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.186<0> +gpub086:138841:138841 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub086:138841:138894 [0] NCCL INFO NET/IB : No device found. +gpub086:138841:138894 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.186<0> [1]hsn0:141.142.145.186<0> [2]eth0:fe80::6790:58e4:2640:df66%eth0<0> +gpub086:138841:138894 [0] NCCL INFO Using network Socket +gpub086:138841:138894 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub086:138841:138894 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub086:138841:138894 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub086:138841:138894 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub086:138841:138894 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub086:138841:138894 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub086:138841:138894 [0] NCCL INFO Connected all rings +gpub086:138841:138894 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub086:138841:138894 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub086:138841:138894 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub086:138841:138894 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub086:138841:138894 [0] NCCL INFO Connected all trees +gpub086:138841:138894 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub086:138841:138894 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub086:138841:138894 [0] NCCL INFO comm 0xc14e4f0 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub086:138844:138844 [3] NCCL INFO cudaDriverVersion 12020 +gpub086:138844:138844 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.186<0> +gpub086:138844:138844 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub086:138844:138897 [3] NCCL INFO NET/IB : No device found. +gpub086:138844:138897 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.186<0> [1]hsn0:141.142.145.186<0> [2]eth0:fe80::6790:58e4:2640:df66%eth0<0> +gpub086:138844:138897 [3] NCCL INFO Using network Socket +gpub086:138844:138897 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub086:138844:138897 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub086:138844:138897 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub086:138844:138897 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub086:138844:138897 [3] NCCL INFO Connected all rings +gpub086:138844:138897 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub086:138844:138897 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub086:138844:138897 [3] NCCL INFO Connected all trees +gpub086:138844:138897 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub086:138844:138897 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub086:138844:138897 [3] NCCL INFO comm 0x1ae3bae0 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:57987:57987 [1] NCCL INFO cudaDriverVersion 12020 +gpub078:57987:57987 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:57987:57987 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:57987:58044 [1] NCCL INFO NET/IB : No device found. +gpub078:57987:58044 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.178<0> [1]hsn0:141.142.145.178<0> [2]eth0:fe80::3d2a:d8d2:47cc:f2ae%eth0<0> +gpub078:57987:58044 [1] NCCL INFO Using network Socket +gpub078:57987:58044 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:57987:58044 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub078:57987:58044 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub078:57987:58044 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub078:57987:58044 [1] NCCL INFO Connected all rings +gpub078:57987:58044 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub078:57987:58044 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub078:57987:58044 [1] NCCL INFO Connected all trees +gpub078:57987:58044 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub078:57987:58044 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:57987:58044 [1] NCCL INFO comm 0x1675cb20 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub086:138842:138842 [1] NCCL INFO cudaDriverVersion 12020 +gpub086:138842:138842 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.186<0> +gpub086:138842:138842 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub086:138842:138895 [1] NCCL INFO NET/IB : No device found. +gpub086:138842:138895 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.186<0> [1]hsn0:141.142.145.186<0> [2]eth0:fe80::6790:58e4:2640:df66%eth0<0> +gpub086:138842:138895 [1] NCCL INFO Using network Socket +gpub086:138842:138895 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub086:138842:138895 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub086:138842:138895 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub086:138842:138895 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub086:138842:138895 [1] NCCL INFO Connected all rings +gpub086:138842:138895 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub086:138842:138895 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub086:138842:138895 [1] NCCL INFO Connected all trees +gpub086:138842:138895 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub086:138842:138895 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub086:138842:138895 [1] NCCL INFO comm 0x1cffd390 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub084:57371:57371 [1] NCCL INFO cudaDriverVersion 12020 +gpub084:57371:57371 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:57371:57371 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:57371:57432 [1] NCCL INFO NET/IB : No device found. +gpub084:57371:57432 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.184<0> [1]hsn0:141.142.145.184<0> [2]eth0:fe80::4845:b018:cee:4be4%eth0<0> +gpub084:57371:57432 [1] NCCL INFO Using network Socket +gpub084:57371:57432 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub084:57371:57432 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub084:57371:57432 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub084:57371:57432 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub084:57371:57432 [1] NCCL INFO Connected all rings +gpub084:57371:57432 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub084:57371:57432 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub084:57371:57432 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub084:57371:57432 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub084:57371:57432 [1] NCCL INFO Connected all trees +gpub084:57371:57432 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub084:57371:57432 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:57371:57432 [1] NCCL INFO comm 0x1ac587c0 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +[gpub078:0/16] 2024-01-24 17:59:16,203 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub078:0/16] 2024-01-24 18:02:07,389 (trainer:737) INFO: 8epoch:train:1-100batch: iter_time=1.477, forward_time=0.521, loss_ctc=67.287, loss_att=64.664, acc=0.677, loss=65.451, backward_time=0.496, grad_norm=34.142, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.108, optim0_lr0=3.779e-04, train_time=7.868 +[gpub078:0/16] 2024-01-24 18:04:53,435 (trainer:737) INFO: 8epoch:train:101-200batch: iter_time=1.054e-04, forward_time=0.484, loss_ctc=55.465, loss_att=50.498, acc=0.711, loss=51.988, backward_time=0.524, grad_norm=48.514, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.109, optim0_lr0=3.777e-04, train_time=1.679 +[gpub078:0/16] 2024-01-24 18:08:13,440 (trainer:737) INFO: 8epoch:train:201-300batch: iter_time=2.230e-04, forward_time=0.359, loss_ctc=64.640, loss_att=74.201, acc=0.652, loss=71.333, backward_time=0.477, grad_norm=33.516, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.099, optim0_lr0=3.775e-04, train_time=2.000 +[gpub078:0/16] 2024-01-24 18:13:09,466 (trainer:737) INFO: 8epoch:train:301-400batch: iter_time=1.038e-04, forward_time=0.519, loss_ctc=57.852, loss_att=58.957, acc=0.672, loss=58.626, backward_time=0.541, grad_norm=29.836, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.109, optim0_lr0=3.773e-04, train_time=2.960 +[gpub078:0/16] 2024-01-24 18:16:06,508 (trainer:737) INFO: 8epoch:train:401-500batch: iter_time=1.108e-04, forward_time=0.427, loss_ctc=64.955, loss_att=58.926, acc=0.696, loss=60.735, backward_time=0.479, grad_norm=30.472, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.097, optim0_lr0=3.772e-04, train_time=1.771 +[gpub078:0/16] 2024-01-24 18:19:43,214 (trainer:737) INFO: 8epoch:train:501-600batch: iter_time=1.000e-04, forward_time=0.492, loss_ctc=58.232, loss_att=56.385, acc=0.694, loss=56.939, backward_time=0.460, grad_norm=30.055, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.109, optim0_lr0=3.770e-04, train_time=2.167 +[gpub078:0/16] 2024-01-24 18:23:34,149 (trainer:737) INFO: 8epoch:train:601-700batch: iter_time=9.105e-05, forward_time=0.378, loss_ctc=66.231, loss_att=65.444, acc=0.661, loss=65.680, backward_time=0.463, grad_norm=31.659, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.108, optim0_lr0=3.768e-04, train_time=2.309 +[gpub078:0/16] 2024-01-24 18:27:40,469 (trainer:737) INFO: 8epoch:train:701-800batch: iter_time=8.991e-05, forward_time=0.461, loss_ctc=77.988, loss_att=77.499, acc=0.670, loss=77.646, backward_time=0.506, grad_norm=39.467, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.106, optim0_lr0=3.766e-04, train_time=2.463 +[gpub078:0/16] 2024-01-24 18:31:43,331 (trainer:737) INFO: 8epoch:train:801-900batch: iter_time=3.867e-04, forward_time=0.391, loss_ctc=68.415, loss_att=74.714, acc=0.689, loss=72.824, backward_time=0.468, grad_norm=32.131, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.099, optim0_lr0=3.764e-04, train_time=2.429 +[gpub078:0/16] 2024-01-24 18:35:22,493 (trainer:737) INFO: 8epoch:train:901-1000batch: iter_time=9.297e-05, forward_time=0.483, loss_ctc=69.085, loss_att=66.656, acc=0.664, loss=67.385, backward_time=0.445, grad_norm=36.685, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.103, optim0_lr0=3.763e-04, train_time=2.191 +[gpub078:0/16] 2024-01-24 18:38:56,291 (trainer:737) INFO: 8epoch:train:1001-1100batch: iter_time=5.010e-04, forward_time=0.434, loss_ctc=60.066, loss_att=55.432, acc=0.691, loss=56.822, backward_time=0.500, grad_norm=27.324, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.107, optim0_lr0=3.761e-04, train_time=2.136 +[gpub078:0/16] 2024-01-24 18:43:15,704 (trainer:737) INFO: 8epoch:train:1101-1200batch: iter_time=9.201e-04, forward_time=0.533, loss_ctc=67.036, loss_att=60.037, acc=0.688, loss=62.137, backward_time=0.538, grad_norm=31.309, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.106, optim0_lr0=3.759e-04, train_time=2.595 +[gpub078:0/16] 2024-01-24 18:45:40,635 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub078:0/16] 2024-01-24 18:45:59,497 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 18:46:03,141 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 18:46:03,141 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub078:0/16] 2024-01-24 18:46:03,145 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 18:59:16,301 (trainer:737) INFO: 8epoch:train:1201-1300batch: iter_time=6.808, forward_time=0.683, loss_ctc=64.050, loss_att=72.417, acc=0.652, loss=69.907, backward_time=0.459, grad_norm=33.530, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.099, optim0_lr0=3.757e-04, train_time=9.606 +[gpub078:0/16] 2024-01-24 19:02:37,385 (trainer:737) INFO: 8epoch:train:1301-1400batch: iter_time=8.733e-05, forward_time=0.289, loss_ctc=59.245, loss_att=53.995, acc=0.690, loss=55.570, backward_time=0.399, grad_norm=31.908, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.092, optim0_lr0=3.756e-04, train_time=2.011 +[gpub078:0/16] 2024-01-24 19:07:56,243 (trainer:737) INFO: 8epoch:train:1401-1500batch: iter_time=8.794e-05, forward_time=0.662, loss_ctc=63.039, loss_att=69.598, acc=0.660, loss=67.630, backward_time=0.541, grad_norm=28.646, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=3.754e-04, train_time=3.188 +[gpub078:0/16] 2024-01-24 19:11:19,262 (trainer:737) INFO: 8epoch:train:1501-1600batch: iter_time=9.274e-05, forward_time=0.287, loss_ctc=55.803, loss_att=57.941, acc=0.679, loss=57.300, backward_time=0.418, grad_norm=26.454, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.102, optim0_lr0=3.752e-04, train_time=2.030 +[gpub078:0/16] 2024-01-24 19:15:42,578 (trainer:737) INFO: 8epoch:train:1601-1700batch: iter_time=1.016e-04, forward_time=0.570, loss_ctc=58.775, loss_att=58.463, acc=0.672, loss=58.556, backward_time=0.500, grad_norm=28.391, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.107, optim0_lr0=3.750e-04, train_time=2.632 +[gpub078:0/16] 2024-01-24 19:19:58,941 (trainer:737) INFO: 8epoch:train:1701-1800batch: iter_time=8.886e-05, forward_time=0.561, loss_ctc=67.197, loss_att=62.810, acc=0.688, loss=64.126, backward_time=0.627, grad_norm=32.377, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.134, optim0_lr0=3.749e-04, train_time=2.564 +[gpub078:0/16] 2024-01-24 19:23:54,548 (trainer:737) INFO: 8epoch:train:1801-1900batch: iter_time=0.001, forward_time=0.492, loss_ctc=54.172, loss_att=55.433, acc=0.671, loss=55.054, backward_time=0.525, grad_norm=27.285, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=3.747e-04, train_time=2.355 +[gpub078:0/16] 2024-01-24 19:28:55,528 (trainer:737) INFO: 8epoch:train:1901-2000batch: iter_time=0.001, forward_time=0.644, loss_ctc=72.970, loss_att=71.812, acc=0.658, loss=72.160, backward_time=0.519, grad_norm=34.859, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.147, optim0_lr0=3.745e-04, train_time=3.009 +[gpub078:0/16] 2024-01-24 19:33:14,512 (trainer:737) INFO: 8epoch:train:2001-2100batch: iter_time=5.994e-04, forward_time=0.476, loss_ctc=70.665, loss_att=83.147, acc=0.652, loss=79.402, backward_time=0.526, grad_norm=34.060, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.123, optim0_lr0=3.743e-04, train_time=2.592 +[gpub078:0/16] 2024-01-24 19:37:39,961 (trainer:737) INFO: 8epoch:train:2101-2200batch: iter_time=5.882e-04, forward_time=0.490, loss_ctc=66.969, loss_att=67.657, acc=0.671, loss=67.451, backward_time=0.443, grad_norm=30.973, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.136, optim0_lr0=3.742e-04, train_time=2.652 +[gpub078:0/16] 2024-01-24 19:41:46,567 (trainer:737) INFO: 8epoch:train:2201-2300batch: iter_time=0.001, forward_time=0.350, loss_ctc=65.029, loss_att=60.496, acc=0.666, loss=61.856, backward_time=0.453, grad_norm=34.476, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.101, optim0_lr0=3.740e-04, train_time=2.468 +[gpub078:0/16] 2024-01-24 19:45:47,438 (trainer:737) INFO: 8epoch:train:2301-2400batch: iter_time=8.167e-05, forward_time=0.535, loss_ctc=61.928, loss_att=53.898, acc=0.679, loss=56.307, backward_time=0.559, grad_norm=29.532, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.121, optim0_lr0=3.738e-04, train_time=2.408 +[gpub078:0/16] 2024-01-24 19:49:26,629 (trainer:737) INFO: 8epoch:train:2401-2500batch: iter_time=1.275e-04, forward_time=0.605, loss_ctc=63.961, loss_att=63.608, acc=0.677, loss=63.714, backward_time=0.484, grad_norm=31.163, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.119, optim0_lr0=3.736e-04, train_time=2.192 +[gpub078:0/16] 2024-01-24 19:49:46,694 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub078:0/16] 2024-01-24 19:50:05,368 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 19:50:08,894 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 19:50:08,894 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub078:0/16] 2024-01-24 19:50:08,898 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 20:03:03,411 (trainer:737) INFO: 8epoch:train:2501-2600batch: iter_time=6.147, forward_time=0.291, loss_ctc=65.172, loss_att=65.587, acc=0.673, loss=65.462, backward_time=0.405, grad_norm=34.929, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.092, optim0_lr0=3.735e-04, train_time=8.168 +[gpub078:0/16] 2024-01-24 20:06:36,862 (trainer:737) INFO: 8epoch:train:2601-2700batch: iter_time=7.638e-05, forward_time=0.289, loss_ctc=54.311, loss_att=50.899, acc=0.714, loss=51.923, backward_time=0.401, grad_norm=26.252, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.092, optim0_lr0=3.733e-04, train_time=2.134 +[gpub078:0/16] 2024-01-24 20:09:33,896 (trainer:737) INFO: 8epoch:train:2701-2800batch: iter_time=7.971e-05, forward_time=0.431, loss_ctc=62.439, loss_att=72.873, acc=0.658, loss=69.743, backward_time=0.499, grad_norm=30.887, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.109, optim0_lr0=3.731e-04, train_time=1.769 +[gpub078:0/16] 2024-01-24 20:12:03,897 (trainer:737) INFO: 8epoch:train:2801-2900batch: iter_time=7.948e-05, forward_time=0.426, loss_ctc=56.383, loss_att=58.784, acc=0.677, loss=58.064, backward_time=0.426, grad_norm=28.181, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.093, optim0_lr0=3.729e-04, train_time=1.501 +[gpub078:0/16] 2024-01-24 20:15:55,893 (trainer:737) INFO: 8epoch:train:2901-3000batch: iter_time=7.822e-05, forward_time=0.290, loss_ctc=64.191, loss_att=58.181, acc=0.701, loss=59.984, backward_time=0.402, grad_norm=28.721, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.092, optim0_lr0=3.728e-04, train_time=2.320 +[gpub078:0/16] 2024-01-24 20:18:46,715 (trainer:737) INFO: 8epoch:train:3001-3100batch: iter_time=8.864e-05, forward_time=0.288, loss_ctc=55.397, loss_att=55.235, acc=0.700, loss=55.284, backward_time=0.399, grad_norm=29.011, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.726e-04, train_time=1.708 +[gpub078:0/16] 2024-01-24 20:21:12,541 (trainer:737) INFO: 8epoch:train:3101-3200batch: iter_time=7.855e-05, forward_time=0.296, loss_ctc=63.677, loss_att=64.910, acc=0.668, loss=64.540, backward_time=0.408, grad_norm=30.250, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.102, optim0_lr0=3.724e-04, train_time=1.458 +[gpub078:0/16] 2024-01-24 20:24:16,315 (trainer:737) INFO: 8epoch:train:3201-3300batch: iter_time=7.550e-05, forward_time=0.499, loss_ctc=75.495, loss_att=77.240, acc=0.673, loss=76.717, backward_time=0.474, grad_norm=36.985, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.109, optim0_lr0=3.722e-04, train_time=1.836 +[gpub078:0/16] 2024-01-24 20:27:21,444 (trainer:737) INFO: 8epoch:train:3301-3400batch: iter_time=8.200e-05, forward_time=0.293, loss_ctc=66.213, loss_att=73.181, acc=0.697, loss=71.091, backward_time=0.407, grad_norm=30.014, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.091, optim0_lr0=3.721e-04, train_time=1.852 +[gpub078:0/16] 2024-01-24 20:29:55,186 (trainer:737) INFO: 8epoch:train:3401-3500batch: iter_time=8.005e-05, forward_time=0.288, loss_ctc=67.050, loss_att=64.978, acc=0.673, loss=65.599, backward_time=0.403, grad_norm=35.110, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.091, optim0_lr0=3.719e-04, train_time=1.537 +[gpub078:0/16] 2024-01-24 20:32:42,044 (trainer:737) INFO: 8epoch:train:3501-3600batch: iter_time=2.349e-04, forward_time=0.303, loss_ctc=58.765, loss_att=54.776, acc=0.698, loss=55.973, backward_time=0.412, grad_norm=29.936, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.097, optim0_lr0=3.717e-04, train_time=1.667 +[gpub078:0/16] 2024-01-24 20:36:02,894 (trainer:737) INFO: 8epoch:train:3601-3700batch: iter_time=8.695e-05, forward_time=0.563, loss_ctc=64.268, loss_att=58.070, acc=0.695, loss=59.930, backward_time=0.543, grad_norm=29.843, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.104, optim0_lr0=3.716e-04, train_time=2.010 +[gpub078:0/16] 2024-01-24 20:37:29,566 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub078:0/16] 2024-01-24 20:37:48,529 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 20:37:52,045 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 20:37:52,045 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub078:0/16] 2024-01-24 20:37:52,259 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 20:51:39,541 (trainer:737) INFO: 8epoch:train:3701-3800batch: iter_time=7.204, forward_time=0.289, loss_ctc=62.626, loss_att=69.753, acc=0.658, loss=67.615, backward_time=0.405, grad_norm=32.949, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.714e-04, train_time=9.366 +[gpub078:0/16] 2024-01-24 20:54:25,932 (trainer:737) INFO: 8epoch:train:3801-3900batch: iter_time=8.709e-05, forward_time=0.288, loss_ctc=56.942, loss_att=52.250, acc=0.693, loss=53.658, backward_time=0.402, grad_norm=28.850, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.712e-04, train_time=1.664 +[gpub078:0/16] 2024-01-24 20:58:23,326 (trainer:737) INFO: 8epoch:train:3901-4000batch: iter_time=8.319e-05, forward_time=0.551, loss_ctc=62.313, loss_att=67.699, acc=0.668, loss=66.083, backward_time=0.518, grad_norm=29.697, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.121, optim0_lr0=3.710e-04, train_time=2.374 +[gpub078:0/16] 2024-01-24 21:00:47,095 (trainer:737) INFO: 8epoch:train:4001-4100batch: iter_time=8.071e-05, forward_time=0.338, loss_ctc=54.975, loss_att=57.356, acc=0.680, loss=56.641, backward_time=0.419, grad_norm=26.714, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.095, optim0_lr0=3.709e-04, train_time=1.437 +[gpub078:0/16] 2024-01-24 21:03:57,527 (trainer:737) INFO: 8epoch:train:4101-4200batch: iter_time=8.280e-05, forward_time=0.287, loss_ctc=58.126, loss_att=57.197, acc=0.676, loss=57.476, backward_time=0.400, grad_norm=29.570, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.707e-04, train_time=1.904 +[gpub078:0/16] 2024-01-24 21:06:50,470 (trainer:737) INFO: 8epoch:train:4201-4300batch: iter_time=8.077e-05, forward_time=0.290, loss_ctc=67.802, loss_att=62.084, acc=0.691, loss=63.800, backward_time=0.403, grad_norm=31.941, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.705e-04, train_time=1.730 +[gpub078:0/16] 2024-01-24 21:10:53,608 (trainer:737) INFO: 8epoch:train:4301-4400batch: iter_time=9.156e-05, forward_time=0.510, loss_ctc=53.108, loss_att=53.714, acc=0.678, loss=53.532, backward_time=0.583, grad_norm=26.781, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.109, optim0_lr0=3.704e-04, train_time=2.431 +[gpub078:0/16] 2024-01-24 21:13:25,227 (trainer:737) INFO: 8epoch:train:4401-4500batch: iter_time=8.395e-05, forward_time=0.317, loss_ctc=71.335, loss_att=70.949, acc=0.663, loss=71.065, backward_time=0.429, grad_norm=34.456, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.093, optim0_lr0=3.702e-04, train_time=1.516 +[gpub078:0/16] 2024-01-24 21:16:48,330 (trainer:737) INFO: 8epoch:train:4501-4600batch: iter_time=8.822e-05, forward_time=0.290, loss_ctc=69.839, loss_att=81.505, acc=0.657, loss=78.005, backward_time=0.405, grad_norm=36.561, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.700e-04, train_time=2.029 +[gpub078:0/16] 2024-01-24 21:19:43,164 (trainer:737) INFO: 8epoch:train:4601-4700batch: iter_time=1.006e-04, forward_time=0.288, loss_ctc=66.742, loss_att=66.865, acc=0.675, loss=66.828, backward_time=0.403, grad_norm=31.087, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.699e-04, train_time=1.749 +[gpub078:0/16] 2024-01-24 21:23:16,349 (trainer:737) INFO: 8epoch:train:4701-4800batch: iter_time=9.956e-05, forward_time=0.479, loss_ctc=62.532, loss_att=59.713, acc=0.669, loss=60.559, backward_time=0.462, grad_norm=31.152, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.122, optim0_lr0=3.697e-04, train_time=2.132 +[gpub078:0/16] 2024-01-24 21:25:40,300 (trainer:737) INFO: 8epoch:train:4801-4900batch: iter_time=9.257e-05, forward_time=0.303, loss_ctc=60.327, loss_att=53.209, acc=0.685, loss=55.345, backward_time=0.413, grad_norm=30.315, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.100, optim0_lr0=3.695e-04, train_time=1.439 +[gpub078:0/16] 2024-01-24 21:28:12,148 (trainer:737) INFO: 8epoch:train:4901-5000batch: iter_time=7.991e-05, forward_time=0.291, loss_ctc=62.789, loss_att=62.919, acc=0.682, loss=62.880, backward_time=0.403, grad_norm=30.866, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.092, optim0_lr0=3.694e-04, train_time=1.518 +[gpub078:0/16] 2024-01-24 21:28:32,495 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub078:0/16] 2024-01-24 21:28:50,764 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 21:28:54,281 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 21:28:54,281 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub078:0/16] 2024-01-24 21:28:54,367 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 21:40:30,517 (trainer:737) INFO: 8epoch:train:5001-5100batch: iter_time=5.588, forward_time=0.292, loss_ctc=63.033, loss_att=64.484, acc=0.678, loss=64.049, backward_time=0.405, grad_norm=32.837, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.093, optim0_lr0=3.692e-04, train_time=7.384 +[gpub078:0/16] 2024-01-24 21:43:33,166 (trainer:737) INFO: 8epoch:train:5101-5200batch: iter_time=7.702e-05, forward_time=0.640, loss_ctc=53.142, loss_att=49.888, acc=0.719, loss=50.864, backward_time=0.501, grad_norm=25.332, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.125, optim0_lr0=3.690e-04, train_time=1.826 +[gpub078:0/16] 2024-01-24 21:46:57,140 (trainer:737) INFO: 8epoch:train:5201-5300batch: iter_time=8.310e-05, forward_time=0.289, loss_ctc=61.762, loss_att=73.108, acc=0.661, loss=69.704, backward_time=0.406, grad_norm=30.767, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.689e-04, train_time=2.040 +[gpub078:0/16] 2024-01-24 21:49:53,787 (trainer:737) INFO: 8epoch:train:5301-5400batch: iter_time=8.817e-05, forward_time=0.290, loss_ctc=55.343, loss_att=57.299, acc=0.682, loss=56.712, backward_time=0.412, grad_norm=26.913, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.102, optim0_lr0=3.687e-04, train_time=1.766 +[gpub078:0/16] 2024-01-24 21:53:19,031 (trainer:737) INFO: 8epoch:train:5401-5500batch: iter_time=8.647e-05, forward_time=0.291, loss_ctc=62.724, loss_att=57.497, acc=0.705, loss=59.065, backward_time=0.404, grad_norm=28.239, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.685e-04, train_time=2.052 +[gpub078:0/16] 2024-01-24 21:57:18,559 (trainer:737) INFO: 8epoch:train:5501-5600batch: iter_time=0.060, forward_time=0.715, loss_ctc=54.590, loss_att=54.446, acc=0.703, loss=54.489, backward_time=0.632, grad_norm=27.886, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.128, optim0_lr0=3.684e-04, train_time=2.395 +[gpub078:0/16] 2024-01-24 22:01:22,134 (trainer:737) INFO: 8epoch:train:5601-5700batch: iter_time=8.489e-05, forward_time=0.290, loss_ctc=63.190, loss_att=63.838, acc=0.671, loss=63.644, backward_time=0.404, grad_norm=29.174, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.682e-04, train_time=2.436 +[gpub078:0/16] 2024-01-24 22:03:50,158 (trainer:737) INFO: 8epoch:train:5701-5800batch: iter_time=8.385e-05, forward_time=0.325, loss_ctc=74.278, loss_att=75.846, acc=0.679, loss=75.376, backward_time=0.409, grad_norm=34.467, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.093, optim0_lr0=3.680e-04, train_time=1.480 +[gpub078:0/16] 2024-01-24 22:06:51,253 (trainer:737) INFO: 8epoch:train:5801-5900batch: iter_time=8.697e-05, forward_time=0.316, loss_ctc=64.992, loss_att=72.950, acc=0.697, loss=70.562, backward_time=0.411, grad_norm=30.582, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.093, optim0_lr0=3.679e-04, train_time=1.811 +[gpub078:0/16] 2024-01-24 22:10:49,115 (trainer:737) INFO: 8epoch:train:5901-6000batch: iter_time=7.957e-05, forward_time=0.554, loss_ctc=65.010, loss_att=64.843, acc=0.673, loss=64.893, backward_time=0.492, grad_norm=30.853, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.114, optim0_lr0=3.677e-04, train_time=2.378 +[gpub078:0/16] 2024-01-24 22:13:53,593 (trainer:737) INFO: 8epoch:train:6001-6100batch: iter_time=7.793e-05, forward_time=0.290, loss_ctc=58.423, loss_att=55.180, acc=0.698, loss=56.153, backward_time=0.401, grad_norm=27.700, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.675e-04, train_time=1.845 +[gpub078:0/16] 2024-01-24 22:16:15,516 (trainer:737) INFO: 8epoch:train:6101-6200batch: iter_time=7.967e-05, forward_time=0.291, loss_ctc=63.547, loss_att=57.439, acc=0.698, loss=59.272, backward_time=0.429, grad_norm=29.217, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.105, optim0_lr0=3.674e-04, train_time=1.419 +[gpub078:0/16] 2024-01-24 22:18:16,718 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub078:0/16] 2024-01-24 22:18:35,575 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 22:18:39,464 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 22:18:39,464 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub078:0/16] 2024-01-24 22:18:39,467 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 22:30:41,898 (trainer:737) INFO: 8epoch:train:6201-6300batch: iter_time=6.486, forward_time=0.580, loss_ctc=61.941, loss_att=68.623, acc=0.662, loss=66.618, backward_time=0.455, grad_norm=33.007, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.101, optim0_lr0=3.672e-04, train_time=8.664 +[gpub078:0/16] 2024-01-24 22:33:12,010 (trainer:737) INFO: 8epoch:train:6301-6400batch: iter_time=7.192e-05, forward_time=0.287, loss_ctc=55.918, loss_att=51.508, acc=0.696, loss=52.831, backward_time=0.399, grad_norm=29.252, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.670e-04, train_time=1.501 +[gpub078:0/16] 2024-01-24 22:36:09,848 (trainer:737) INFO: 8epoch:train:6401-6500batch: iter_time=8.064e-05, forward_time=0.288, loss_ctc=61.422, loss_att=66.618, acc=0.672, loss=65.060, backward_time=0.401, grad_norm=28.147, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.669e-04, train_time=1.778 +[gpub078:0/16] 2024-01-24 22:39:53,381 (trainer:737) INFO: 8epoch:train:6501-6600batch: iter_time=7.778e-05, forward_time=0.295, loss_ctc=53.722, loss_att=56.005, acc=0.688, loss=55.320, backward_time=0.410, grad_norm=26.435, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.097, optim0_lr0=3.667e-04, train_time=2.234 +[gpub078:0/16] 2024-01-24 22:43:08,762 (trainer:737) INFO: 8epoch:train:6601-6700batch: iter_time=8.299e-05, forward_time=0.598, loss_ctc=57.296, loss_att=56.583, acc=0.681, loss=56.797, backward_time=0.476, grad_norm=28.002, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.122, optim0_lr0=3.665e-04, train_time=1.954 +[gpub078:0/16] 2024-01-24 22:46:03,883 (trainer:737) INFO: 8epoch:train:6701-6800batch: iter_time=8.820e-05, forward_time=0.289, loss_ctc=65.013, loss_att=61.416, acc=0.694, loss=62.495, backward_time=0.404, grad_norm=32.354, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.664e-04, train_time=1.751 +[gpub078:0/16] 2024-01-24 22:48:58,011 (trainer:737) INFO: 8epoch:train:6801-6900batch: iter_time=8.057e-05, forward_time=0.295, loss_ctc=52.577, loss_att=53.208, acc=0.681, loss=53.019, backward_time=0.398, grad_norm=26.422, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.092, optim0_lr0=3.662e-04, train_time=1.741 +[gpub078:0/16] 2024-01-24 22:51:51,233 (trainer:737) INFO: 8epoch:train:6901-7000batch: iter_time=7.895e-05, forward_time=0.292, loss_ctc=70.545, loss_att=69.654, acc=0.668, loss=69.922, backward_time=0.433, grad_norm=32.569, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.098, optim0_lr0=3.660e-04, train_time=1.732 +[gpub078:0/16] 2024-01-24 22:55:02,608 (trainer:737) INFO: 8epoch:train:7001-7100batch: iter_time=7.890e-05, forward_time=0.541, loss_ctc=69.239, loss_att=80.677, acc=0.662, loss=77.246, backward_time=0.486, grad_norm=31.885, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.103, optim0_lr0=3.659e-04, train_time=1.912 +[gpub078:0/16] 2024-01-24 22:58:12,161 (trainer:737) INFO: 8epoch:train:7101-7200batch: iter_time=8.230e-05, forward_time=0.289, loss_ctc=65.159, loss_att=65.849, acc=0.680, loss=65.642, backward_time=0.402, grad_norm=30.445, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.657e-04, train_time=1.897 +[gpub078:0/16] 2024-01-24 23:00:30,605 (trainer:737) INFO: 8epoch:train:7201-7300batch: iter_time=8.119e-05, forward_time=0.292, loss_ctc=63.188, loss_att=59.080, acc=0.672, loss=60.312, backward_time=0.417, grad_norm=35.145, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.097, optim0_lr0=3.656e-04, train_time=1.384 +[gpub078:0/16] 2024-01-24 23:03:38,737 (trainer:737) INFO: 8epoch:train:7301-7400batch: iter_time=3.951e-04, forward_time=0.505, loss_ctc=58.899, loss_att=52.156, acc=0.688, loss=54.179, backward_time=0.466, grad_norm=29.318, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.106, optim0_lr0=3.654e-04, train_time=1.880 +[gpub078:0/16] 2024-01-24 23:06:57,923 (trainer:737) INFO: 8epoch:train:7401-7500batch: iter_time=7.726e-05, forward_time=0.291, loss_ctc=62.314, loss_att=61.999, acc=0.684, loss=62.093, backward_time=0.398, grad_norm=31.437, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.652e-04, train_time=1.992 +[gpub078:0/16] 2024-01-24 23:07:18,449 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub078:0/16] 2024-01-24 23:07:36,937 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 23:07:40,334 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 23:07:40,334 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub078:0/16] 2024-01-24 23:07:40,337 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-24 23:20:40,977 (trainer:737) INFO: 8epoch:train:7501-7600batch: iter_time=6.699, forward_time=0.304, loss_ctc=62.811, loss_att=60.300, acc=0.676, loss=61.053, backward_time=0.407, grad_norm=33.122, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.095, optim0_lr0=3.651e-04, train_time=8.231 +[gpub078:0/16] 2024-01-24 23:24:07,346 (trainer:737) INFO: 8epoch:train:7601-7700batch: iter_time=0.016, forward_time=0.597, loss_ctc=52.814, loss_att=48.227, acc=0.715, loss=49.603, backward_time=0.513, grad_norm=25.456, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.115, optim0_lr0=3.649e-04, train_time=2.063 +[gpub078:0/16] 2024-01-24 23:28:50,579 (trainer:737) INFO: 8epoch:train:7701-7800batch: iter_time=8.963e-05, forward_time=0.288, loss_ctc=61.499, loss_att=71.298, acc=0.655, loss=68.359, backward_time=0.402, grad_norm=30.173, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.647e-04, train_time=2.831 +[gpub078:0/16] 2024-01-24 23:31:09,546 (trainer:737) INFO: 8epoch:train:7801-7900batch: iter_time=9.935e-05, forward_time=0.287, loss_ctc=54.465, loss_att=54.471, acc=0.684, loss=54.469, backward_time=0.404, grad_norm=27.970, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.103, optim0_lr0=3.646e-04, train_time=1.391 +[gpub078:0/16] 2024-01-24 23:34:46,284 (trainer:737) INFO: 8epoch:train:7901-8000batch: iter_time=1.000e-04, forward_time=0.570, loss_ctc=62.655, loss_att=56.134, acc=0.700, loss=58.090, backward_time=0.522, grad_norm=30.269, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.110, optim0_lr0=3.644e-04, train_time=2.167 +[gpub078:0/16] 2024-01-24 23:37:56,352 (trainer:737) INFO: 8epoch:train:8001-8100batch: iter_time=1.062e-04, forward_time=0.288, loss_ctc=53.950, loss_att=54.649, acc=0.690, loss=54.439, backward_time=0.403, grad_norm=29.826, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.643e-04, train_time=1.897 +[gpub078:0/16] 2024-01-24 23:41:10,528 (trainer:737) INFO: 8epoch:train:8101-8200batch: iter_time=9.878e-05, forward_time=0.290, loss_ctc=62.726, loss_att=63.608, acc=0.670, loss=63.343, backward_time=0.399, grad_norm=30.855, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.641e-04, train_time=1.941 +[gpub078:0/16] 2024-01-24 23:44:20,599 (trainer:737) INFO: 8epoch:train:8201-8300batch: iter_time=9.591e-05, forward_time=0.294, loss_ctc=73.559, loss_att=73.909, acc=0.671, loss=73.804, backward_time=0.426, grad_norm=35.185, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.098, optim0_lr0=3.639e-04, train_time=1.904 +[gpub078:0/16] 2024-01-24 23:47:40,342 (trainer:737) INFO: 8epoch:train:8301-8400batch: iter_time=9.456e-05, forward_time=0.674, loss_ctc=65.220, loss_att=72.429, acc=0.684, loss=70.266, backward_time=0.508, grad_norm=31.104, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.115, optim0_lr0=3.638e-04, train_time=1.997 +[gpub078:0/16] 2024-01-24 23:50:35,964 (trainer:737) INFO: 8epoch:train:8401-8500batch: iter_time=9.248e-05, forward_time=0.288, loss_ctc=65.308, loss_att=63.566, acc=0.666, loss=64.089, backward_time=0.403, grad_norm=33.469, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.636e-04, train_time=1.753 +[gpub078:0/16] 2024-01-24 23:54:29,864 (trainer:737) INFO: 8epoch:train:8501-8600batch: iter_time=9.965e-05, forward_time=0.289, loss_ctc=57.362, loss_att=53.344, acc=0.697, loss=54.549, backward_time=0.396, grad_norm=28.308, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.635e-04, train_time=2.342 +[gpub078:0/16] 2024-01-24 23:57:00,369 (trainer:737) INFO: 8epoch:train:8601-8700batch: iter_time=3.252e-04, forward_time=0.310, loss_ctc=62.829, loss_att=57.121, acc=0.693, loss=58.833, backward_time=0.408, grad_norm=29.109, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.098, optim0_lr0=3.633e-04, train_time=1.505 +[gpub078:0/16] 2024-01-24 23:59:09,977 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub078:0/16] 2024-01-24 23:59:28,651 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-24 23:59:32,300 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-24 23:59:32,300 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub078:0/16] 2024-01-24 23:59:32,340 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 00:11:19,438 (trainer:737) INFO: 8epoch:train:8701-8800batch: iter_time=6.534, forward_time=0.548, loss_ctc=60.834, loss_att=68.592, acc=0.670, loss=66.265, backward_time=0.470, grad_norm=33.352, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.105, optim0_lr0=3.631e-04, train_time=8.590 +[gpub078:0/16] 2024-01-25 00:14:10,540 (trainer:737) INFO: 8epoch:train:8801-8900batch: iter_time=8.277e-05, forward_time=0.288, loss_ctc=55.472, loss_att=51.518, acc=0.710, loss=52.704, backward_time=0.401, grad_norm=32.390, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.092, optim0_lr0=3.630e-04, train_time=1.711 +[gpub078:0/16] 2024-01-25 00:17:25,268 (trainer:737) INFO: 8epoch:train:8901-9000batch: iter_time=8.526e-05, forward_time=0.292, loss_ctc=60.573, loss_att=67.499, acc=0.679, loss=65.421, backward_time=0.414, grad_norm=28.087, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.100, optim0_lr0=3.628e-04, train_time=1.947 +[gpub078:0/16] 2024-01-25 00:20:54,382 (trainer:737) INFO: 8epoch:train:9001-9100batch: iter_time=8.638e-05, forward_time=0.560, loss_ctc=53.441, loss_att=56.378, acc=0.692, loss=55.497, backward_time=0.484, grad_norm=26.422, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.109, optim0_lr0=3.627e-04, train_time=2.089 +[gpub078:0/16] 2024-01-25 00:23:37,900 (trainer:737) INFO: 8epoch:train:9101-9200batch: iter_time=8.396e-05, forward_time=0.290, loss_ctc=56.064, loss_att=57.980, acc=0.689, loss=57.405, backward_time=0.401, grad_norm=26.989, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.092, optim0_lr0=3.625e-04, train_time=1.637 +[gpub078:0/16] 2024-01-25 00:26:59,187 (trainer:737) INFO: 8epoch:train:9201-9300batch: iter_time=8.910e-05, forward_time=0.303, loss_ctc=63.853, loss_att=60.470, acc=0.710, loss=61.485, backward_time=0.438, grad_norm=33.214, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.097, optim0_lr0=3.623e-04, train_time=2.013 +[gpub078:0/16] 2024-01-25 00:30:56,390 (trainer:737) INFO: 8epoch:train:9301-9400batch: iter_time=8.871e-05, forward_time=0.569, loss_ctc=51.494, loss_att=52.538, acc=0.692, loss=52.225, backward_time=0.516, grad_norm=25.411, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.110, optim0_lr0=3.622e-04, train_time=2.371 +[gpub078:0/16] 2024-01-25 00:33:33,453 (trainer:737) INFO: 8epoch:train:9401-9500batch: iter_time=9.037e-05, forward_time=0.292, loss_ctc=69.245, loss_att=69.552, acc=0.680, loss=69.460, backward_time=0.406, grad_norm=31.566, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.092, optim0_lr0=3.620e-04, train_time=1.570 +[gpub078:0/16] 2024-01-25 00:36:28,235 (trainer:737) INFO: 8epoch:train:9501-9600batch: iter_time=9.372e-05, forward_time=0.297, loss_ctc=67.776, loss_att=79.661, acc=0.685, loss=76.095, backward_time=0.411, grad_norm=32.796, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.105, optim0_lr0=3.619e-04, train_time=1.749 +[gpub078:0/16] 2024-01-25 00:40:38,944 (trainer:737) INFO: 8epoch:train:9601-9700batch: iter_time=8.592e-05, forward_time=0.529, loss_ctc=64.299, loss_att=66.376, acc=0.690, loss=65.753, backward_time=0.496, grad_norm=29.311, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.109, optim0_lr0=3.617e-04, train_time=2.507 +[gpub078:0/16] 2024-01-25 00:42:55,191 (trainer:737) INFO: 8epoch:train:9701-9800batch: iter_time=7.615e-05, forward_time=0.290, loss_ctc=61.367, loss_att=58.931, acc=0.684, loss=59.662, backward_time=0.403, grad_norm=30.355, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.092, optim0_lr0=3.615e-04, train_time=1.361 +[gpub078:0/16] 2024-01-25 00:46:23,639 (trainer:737) INFO: 8epoch:train:9801-9900batch: iter_time=7.927e-05, forward_time=0.288, loss_ctc=58.096, loss_att=52.534, acc=0.695, loss=54.202, backward_time=0.398, grad_norm=30.514, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.095, optim0_lr0=3.614e-04, train_time=2.082 +[gpub078:0/16] 2024-01-25 00:49:58,496 (trainer:737) INFO: 8epoch:train:9901-10000batch: iter_time=4.377e-04, forward_time=0.614, loss_ctc=61.839, loss_att=61.462, acc=0.701, loss=61.575, backward_time=0.509, grad_norm=29.643, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.125, optim0_lr0=3.612e-04, train_time=2.150 +[gpub078:0/16] 2024-01-25 00:50:18,600 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub078:0/16] 2024-01-25 00:50:37,534 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 00:50:40,961 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 00:50:40,961 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub078:0/16] 2024-01-25 00:50:40,964 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 01:01:49,466 (trainer:737) INFO: 8epoch:train:10001-10100batch: iter_time=5.544, forward_time=0.339, loss_ctc=62.134, loss_att=61.416, acc=0.689, loss=61.631, backward_time=0.416, grad_norm=32.341, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.094, optim0_lr0=3.611e-04, train_time=7.110 +[gpub078:0/16] 2024-01-25 01:05:06,503 (trainer:737) INFO: 8epoch:train:10101-10200batch: iter_time=8.378e-05, forward_time=0.339, loss_ctc=52.402, loss_att=48.582, acc=0.725, loss=49.728, backward_time=0.405, grad_norm=25.259, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.100, optim0_lr0=3.609e-04, train_time=1.970 +[gpub078:0/16] 2024-01-25 01:07:54,067 (trainer:737) INFO: 8epoch:train:10201-10300batch: iter_time=1.270e-04, forward_time=0.421, loss_ctc=61.047, loss_att=71.359, acc=0.667, loss=68.265, backward_time=0.433, grad_norm=29.622, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.104, optim0_lr0=3.608e-04, train_time=1.675 +[gpub078:0/16] 2024-01-25 01:11:18,011 (trainer:737) INFO: 8epoch:train:10301-10400batch: iter_time=1.020e-04, forward_time=0.349, loss_ctc=54.472, loss_att=57.000, acc=0.685, loss=56.242, backward_time=0.426, grad_norm=30.589, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.097, optim0_lr0=3.606e-04, train_time=2.039 +[gpub078:0/16] 2024-01-25 01:14:58,948 (trainer:737) INFO: 8epoch:train:10401-10500batch: iter_time=1.055e-04, forward_time=0.336, loss_ctc=61.660, loss_att=56.253, acc=0.710, loss=57.875, backward_time=0.403, grad_norm=28.424, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.092, optim0_lr0=3.605e-04, train_time=2.208 +[gpub078:0/16] 2024-01-25 01:17:45,482 (trainer:737) INFO: 8epoch:train:10501-10600batch: iter_time=1.056e-04, forward_time=0.488, loss_ctc=56.365, loss_att=53.712, acc=0.709, loss=54.508, backward_time=0.454, grad_norm=29.520, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.111, optim0_lr0=3.603e-04, train_time=1.666 +[gpub078:0/16] 2024-01-25 01:20:18,081 (trainer:737) INFO: 8epoch:train:10601-10700batch: iter_time=1.074e-04, forward_time=0.312, loss_ctc=62.052, loss_att=63.588, acc=0.677, loss=63.127, backward_time=0.414, grad_norm=29.683, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.099, optim0_lr0=3.601e-04, train_time=1.526 +[gpub078:0/16] 2024-01-25 01:24:17,462 (trainer:737) INFO: 8epoch:train:10701-10800batch: iter_time=1.049e-04, forward_time=0.324, loss_ctc=72.880, loss_att=74.173, acc=0.684, loss=73.785, backward_time=0.423, grad_norm=33.507, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.097, optim0_lr0=3.600e-04, train_time=2.394 +[gpub078:0/16] 2024-01-25 01:26:38,082 (trainer:737) INFO: 8epoch:train:10801-10900batch: iter_time=1.043e-04, forward_time=0.304, loss_ctc=63.771, loss_att=71.333, acc=0.706, loss=69.064, backward_time=0.408, grad_norm=28.817, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.092, optim0_lr0=3.598e-04, train_time=1.405 +[gpub078:0/16] 2024-01-25 01:30:18,879 (trainer:737) INFO: 8epoch:train:10901-11000batch: iter_time=1.129e-04, forward_time=0.565, loss_ctc=64.777, loss_att=63.710, acc=0.681, loss=64.030, backward_time=0.496, grad_norm=33.086, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.098, optim0_lr0=3.597e-04, train_time=2.208 +[gpub078:0/16] 2024-01-25 01:33:28,182 (trainer:737) INFO: 8epoch:train:11001-11100batch: iter_time=6.660e-04, forward_time=0.327, loss_ctc=56.778, loss_att=53.678, acc=0.706, loss=54.608, backward_time=0.435, grad_norm=26.646, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.104, optim0_lr0=3.595e-04, train_time=1.891 +[gpub078:0/16] 2024-01-25 01:37:00,712 (trainer:737) INFO: 8epoch:train:11101-11200batch: iter_time=1.210e-04, forward_time=0.287, loss_ctc=61.886, loss_att=56.577, acc=0.702, loss=58.170, backward_time=0.400, grad_norm=29.821, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.094, optim0_lr0=3.594e-04, train_time=2.127 +[gpub078:0/16] 2024-01-25 01:38:52,617 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub078:0/16] 2024-01-25 01:39:11,416 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 01:39:14,948 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 01:39:14,948 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub078:0/16] 2024-01-25 01:39:14,971 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 01:48:05,115 (trainer:737) INFO: 8epoch:train:11201-11300batch: iter_time=4.831, forward_time=0.442, loss_ctc=60.038, loss_att=67.901, acc=0.668, loss=65.542, backward_time=0.425, grad_norm=31.619, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.095, optim0_lr0=3.592e-04, train_time=6.644 +[gpub078:0/16] 2024-01-25 01:51:26,815 (trainer:737) INFO: 8epoch:train:11301-11400batch: iter_time=2.415e-04, forward_time=0.335, loss_ctc=54.436, loss_att=50.228, acc=0.705, loss=51.491, backward_time=0.411, grad_norm=28.537, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.096, optim0_lr0=3.591e-04, train_time=2.013 +[gpub078:0/16] 2024-01-25 01:54:18,742 (trainer:737) INFO: 8epoch:train:11401-11500batch: iter_time=8.561e-05, forward_time=0.289, loss_ctc=59.821, loss_att=65.393, acc=0.678, loss=63.721, backward_time=0.400, grad_norm=28.882, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.091, optim0_lr0=3.589e-04, train_time=1.723 +[gpub078:0/16] 2024-01-25 01:56:42,279 (trainer:737) INFO: 8epoch:train:11501-11600batch: iter_time=8.373e-05, forward_time=0.348, loss_ctc=53.037, loss_att=55.442, acc=0.691, loss=54.721, backward_time=0.432, grad_norm=26.054, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.097, optim0_lr0=3.587e-04, train_time=1.435 +[gpub078:0/16] 2024-01-25 01:59:16,772 (trainer:737) INFO: 8epoch:train:11601-11700batch: iter_time=3.943e-04, forward_time=0.307, loss_ctc=55.823, loss_att=54.977, acc=0.687, loss=55.231, backward_time=0.413, grad_norm=28.236, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.095, optim0_lr0=3.586e-04, train_time=1.545 +[gpub078:0/16] 2024-01-25 02:01:39,723 (trainer:737) INFO: 8epoch:train:11701-11800batch: iter_time=8.632e-05, forward_time=0.295, loss_ctc=63.336, loss_att=60.078, acc=0.702, loss=61.055, backward_time=0.406, grad_norm=33.716, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.093, optim0_lr0=3.584e-04, train_time=1.428 +[gpub078:0/16] 2024-01-25 02:04:10,951 (trainer:737) INFO: 8epoch:train:11801-11900batch: iter_time=8.785e-05, forward_time=0.290, loss_ctc=51.488, loss_att=53.311, acc=0.685, loss=52.764, backward_time=0.399, grad_norm=26.423, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.092, optim0_lr0=3.583e-04, train_time=1.513 +[gpub078:0/16] 2024-01-25 02:07:33,284 (trainer:737) INFO: 8epoch:train:11901-12000batch: iter_time=0.059, forward_time=0.415, loss_ctc=68.773, loss_att=68.366, acc=0.674, loss=68.488, backward_time=0.430, grad_norm=32.145, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.093, optim0_lr0=3.581e-04, train_time=2.023 +[gpub078:0/16] 2024-01-25 02:10:04,405 (trainer:737) INFO: 8epoch:train:12001-12100batch: iter_time=5.194e-04, forward_time=0.310, loss_ctc=67.047, loss_att=79.097, acc=0.667, loss=75.482, backward_time=0.424, grad_norm=31.817, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.099, optim0_lr0=3.580e-04, train_time=1.511 +[gpub078:0/16] 2024-01-25 02:12:22,196 (trainer:737) INFO: 8epoch:train:12101-12200batch: iter_time=8.166e-05, forward_time=0.289, loss_ctc=63.841, loss_att=64.768, acc=0.687, loss=64.490, backward_time=0.402, grad_norm=30.122, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.092, optim0_lr0=3.578e-04, train_time=1.377 +[gpub078:0/16] 2024-01-25 02:15:11,231 (trainer:737) INFO: 8epoch:train:12201-12300batch: iter_time=8.638e-05, forward_time=0.288, loss_ctc=60.222, loss_att=57.120, acc=0.680, loss=58.050, backward_time=0.400, grad_norm=31.627, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.092, optim0_lr0=3.577e-04, train_time=1.691 +[gpub078:0/16] 2024-01-25 02:17:53,493 (trainer:737) INFO: 8epoch:train:12301-12400batch: iter_time=8.241e-05, forward_time=0.357, loss_ctc=58.536, loss_att=51.930, acc=0.693, loss=53.912, backward_time=0.470, grad_norm=29.847, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.103, optim0_lr0=3.575e-04, train_time=1.622 +[gpub078:0/16] 2024-01-25 02:20:19,380 (trainer:737) INFO: 8epoch:train:12401-12500batch: iter_time=8.710e-05, forward_time=0.310, loss_ctc=60.917, loss_att=61.058, acc=0.690, loss=61.016, backward_time=0.410, grad_norm=30.314, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.094, optim0_lr0=3.574e-04, train_time=1.459 +[gpub078:0/16] 2024-01-25 02:20:39,617 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub078:0/16] 2024-01-25 02:20:58,806 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 02:21:02,459 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 02:21:02,459 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub078:0/16] 2024-01-25 02:21:02,463 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 02:28:52,268 (trainer:737) INFO: 8epoch:train:12501-12600batch: iter_time=3.539, forward_time=0.292, loss_ctc=60.714, loss_att=59.880, acc=0.677, loss=60.130, backward_time=0.402, grad_norm=33.364, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.092, optim0_lr0=3.572e-04, train_time=5.128 +[gpub078:0/16] 2024-01-25 02:31:49,481 (trainer:737) INFO: 8epoch:train:12601-12700batch: iter_time=2.577e-04, forward_time=0.409, loss_ctc=51.817, loss_att=47.262, acc=0.722, loss=48.628, backward_time=0.412, grad_norm=25.554, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.103, optim0_lr0=3.571e-04, train_time=1.773 +[gpub078:0/16] 2024-01-25 02:34:03,809 (trainer:737) INFO: 8epoch:train:12701-12800batch: iter_time=8.729e-05, forward_time=0.303, loss_ctc=60.320, loss_att=70.076, acc=0.661, loss=67.149, backward_time=0.407, grad_norm=28.213, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.093, optim0_lr0=3.569e-04, train_time=1.342 +[gpub078:0/16] 2024-01-25 02:36:52,143 (trainer:737) INFO: 8epoch:train:12801-12900batch: iter_time=9.992e-05, forward_time=0.286, loss_ctc=54.227, loss_att=54.101, acc=0.689, loss=54.139, backward_time=0.397, grad_norm=28.533, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.092, optim0_lr0=3.568e-04, train_time=1.684 +[gpub078:0/16] 2024-01-25 02:39:38,067 (trainer:737) INFO: 8epoch:train:12901-13000batch: iter_time=9.090e-05, forward_time=0.380, loss_ctc=61.217, loss_att=55.058, acc=0.706, loss=56.906, backward_time=0.432, grad_norm=27.487, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.097, optim0_lr0=3.566e-04, train_time=1.658 +[gpub078:0/16] 2024-01-25 02:42:07,142 (trainer:737) INFO: 8epoch:train:13001-13100batch: iter_time=8.999e-05, forward_time=0.322, loss_ctc=52.556, loss_att=53.660, acc=0.697, loss=53.329, backward_time=0.410, grad_norm=29.676, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.094, optim0_lr0=3.565e-04, train_time=1.491 +[gpub078:0/16] 2024-01-25 02:44:40,586 (trainer:737) INFO: 8epoch:train:13101-13200batch: iter_time=9.359e-05, forward_time=0.291, loss_ctc=60.911, loss_att=62.241, acc=0.677, loss=61.842, backward_time=0.404, grad_norm=29.782, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.092, optim0_lr0=3.563e-04, train_time=1.533 +[gpub078:0/16] 2024-01-25 02:47:49,508 (trainer:737) INFO: 8epoch:train:13201-13300batch: iter_time=9.399e-05, forward_time=0.379, loss_ctc=72.104, loss_att=73.335, acc=0.675, loss=72.966, backward_time=0.440, grad_norm=33.135, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.098, optim0_lr0=3.562e-04, train_time=1.890 +[gpub078:0/16] 2024-01-25 02:50:18,412 (trainer:737) INFO: 8epoch:train:13301-13400batch: iter_time=9.763e-05, forward_time=0.316, loss_ctc=63.212, loss_att=70.237, acc=0.692, loss=68.130, backward_time=0.413, grad_norm=30.153, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.098, optim0_lr0=3.560e-04, train_time=1.489 +[gpub078:0/16] 2024-01-25 02:52:57,054 (trainer:737) INFO: 8epoch:train:13401-13500batch: iter_time=1.063e-04, forward_time=0.288, loss_ctc=63.805, loss_att=62.508, acc=0.671, loss=62.897, backward_time=0.399, grad_norm=33.353, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.092, optim0_lr0=3.559e-04, train_time=1.586 +[gpub078:0/16] 2024-01-25 02:55:35,952 (trainer:737) INFO: 8epoch:train:13501-13600batch: iter_time=5.124e-04, forward_time=0.394, loss_ctc=55.754, loss_att=53.019, acc=0.700, loss=53.839, backward_time=0.421, grad_norm=27.365, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.101, optim0_lr0=3.557e-04, train_time=1.587 +[gpub078:0/16] 2024-01-25 02:58:26,453 (trainer:737) INFO: 8epoch:train:13601-13700batch: iter_time=1.019e-04, forward_time=0.310, loss_ctc=61.690, loss_att=56.335, acc=0.699, loss=57.942, backward_time=0.409, grad_norm=31.652, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.102, optim0_lr0=3.556e-04, train_time=1.706 +[gpub078:0/16] 2024-01-25 03:00:10,614 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub078:0/16] 2024-01-25 03:00:29,167 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 03:00:32,644 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 03:00:32,644 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub078:0/16] 2024-01-25 03:00:32,721 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 03:07:08,485 (trainer:737) INFO: 8epoch:train:13701-13800batch: iter_time=3.544, forward_time=0.350, loss_ctc=59.593, loss_att=67.475, acc=0.674, loss=65.111, backward_time=0.416, grad_norm=31.910, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.092, optim0_lr0=3.554e-04, train_time=5.220 +[gpub078:0/16] 2024-01-25 03:09:28,655 (trainer:737) INFO: 8epoch:train:13801-13900batch: iter_time=8.069e-05, forward_time=0.326, loss_ctc=54.046, loss_att=50.474, acc=0.714, loss=51.546, backward_time=0.410, grad_norm=30.288, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.095, optim0_lr0=3.553e-04, train_time=1.401 +[gpub078:0/16] 2024-01-25 03:12:23,906 (trainer:737) INFO: 8epoch:train:13901-14000batch: iter_time=7.989e-05, forward_time=0.320, loss_ctc=59.267, loss_att=65.722, acc=0.687, loss=63.786, backward_time=0.411, grad_norm=28.137, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.098, optim0_lr0=3.551e-04, train_time=1.753 +[gpub078:0/16] 2024-01-25 03:15:07,094 (trainer:737) INFO: 8epoch:train:14001-14100batch: iter_time=9.019e-05, forward_time=0.336, loss_ctc=51.967, loss_att=54.981, acc=0.700, loss=54.077, backward_time=0.411, grad_norm=26.587, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.097, optim0_lr0=3.550e-04, train_time=1.632 +[gpub078:0/16] 2024-01-25 03:17:30,243 (trainer:737) INFO: 8epoch:train:14101-14200batch: iter_time=9.332e-05, forward_time=0.338, loss_ctc=55.440, loss_att=57.432, acc=0.690, loss=56.835, backward_time=0.409, grad_norm=28.564, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.096, optim0_lr0=3.548e-04, train_time=1.431 +[gpub078:0/16] 2024-01-25 03:20:12,200 (trainer:737) INFO: 8epoch:train:14201-14300batch: iter_time=8.946e-05, forward_time=0.292, loss_ctc=63.439, loss_att=60.127, acc=0.713, loss=61.121, backward_time=0.406, grad_norm=30.423, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.093, optim0_lr0=3.547e-04, train_time=1.619 +[gpub078:0/16] 2024-01-25 03:22:56,515 (trainer:737) INFO: 8epoch:train:14301-14400batch: iter_time=8.708e-05, forward_time=0.317, loss_ctc=50.858, loss_att=52.369, acc=0.698, loss=51.916, backward_time=0.410, grad_norm=26.054, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.098, optim0_lr0=3.545e-04, train_time=1.643 +[gpub078:0/16] 2024-01-25 03:25:19,537 (trainer:737) INFO: 8epoch:train:14401-14500batch: iter_time=8.892e-05, forward_time=0.344, loss_ctc=67.853, loss_att=68.817, acc=0.684, loss=68.528, backward_time=0.423, grad_norm=32.149, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.100, optim0_lr0=3.544e-04, train_time=1.430 +[gpub078:0/16] 2024-01-25 03:27:50,621 (trainer:737) INFO: 8epoch:train:14501-14600batch: iter_time=8.837e-05, forward_time=0.331, loss_ctc=67.369, loss_att=79.393, acc=0.689, loss=75.786, backward_time=0.421, grad_norm=31.812, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.092, optim0_lr0=3.542e-04, train_time=1.511 +[gpub078:0/16] 2024-01-25 03:30:41,635 (trainer:737) INFO: 8epoch:train:14601-14700batch: iter_time=8.326e-05, forward_time=0.312, loss_ctc=63.033, loss_att=64.642, acc=0.698, loss=64.159, backward_time=0.428, grad_norm=28.081, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.099, optim0_lr0=3.541e-04, train_time=1.710 +[gpub078:0/16] 2024-01-25 03:33:13,087 (trainer:737) INFO: 8epoch:train:14701-14800batch: iter_time=1.414e-04, forward_time=0.378, loss_ctc=60.311, loss_att=56.914, acc=0.692, loss=57.933, backward_time=0.426, grad_norm=31.694, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.102, optim0_lr0=3.539e-04, train_time=1.514 +[gpub078:0/16] 2024-01-25 03:35:30,330 (trainer:737) INFO: 8epoch:train:14801-14900batch: iter_time=9.876e-05, forward_time=0.306, loss_ctc=57.260, loss_att=51.434, acc=0.703, loss=53.182, backward_time=0.413, grad_norm=28.882, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.093, optim0_lr0=3.538e-04, train_time=1.373 +[gpub078:0/16] 2024-01-25 03:38:09,076 (trainer:737) INFO: 8epoch:train:14901-15000batch: iter_time=1.037e-04, forward_time=0.311, loss_ctc=59.925, loss_att=60.367, acc=0.706, loss=60.234, backward_time=0.418, grad_norm=28.534, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.096, optim0_lr0=3.536e-04, train_time=1.587 +[gpub078:0/16] 2024-01-25 04:16:44,859 (trainer:343) INFO: 8epoch results: [train] iter_time=0.430, forward_time=0.379, loss_ctc=61.446, loss_att=61.842, acc=0.685, loss=61.723, backward_time=0.439, grad_norm=30.474, clip=100.000, loss_scale=1.002e+22, optim_step_time=0.100, optim0_lr0=3.653e-04, train_time=2.356, time=9 hours, 49 minutes and 36.77 seconds, total_count=150000, gpu_max_cached_mem_GB=40.762, [valid] loss_ctc=55.679, cer_ctc=0.283, loss_att=52.799, acc=0.586, cer=0.406, wer=0.999, loss=53.663, time=38 minutes and 9.16 seconds, total_count=46710, gpu_max_cached_mem_GB=40.762 +[gpub078:0/16] 2024-01-25 04:17:02,355 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub078:0/16] 2024-01-25 04:17:02,433 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/3epoch.pth +[gpub078:0/16] 2024-01-25 04:17:02,434 (trainer:272) INFO: 9/45epoch started. Estimated time to finish: 2 weeks, 2 days and 3 hours +[gpub078:0/16] 2024-01-25 04:17:02,445 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub078:0/16] 2024-01-25 04:17:20,318 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 04:17:23,634 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 04:17:23,634 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub078:0/16] 2024-01-25 04:17:23,637 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 04:25:03,710 (trainer:737) INFO: 9epoch:train:1-100batch: iter_time=3.284, forward_time=0.373, loss_ctc=59.540, loss_att=54.501, acc=0.684, loss=56.013, backward_time=0.412, grad_norm=33.090, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.097, optim0_lr0=3.535e-04, train_time=4.812 +[gpub078:0/16] 2024-01-25 04:27:43,057 (trainer:737) INFO: 9epoch:train:101-200batch: iter_time=9.255e-05, forward_time=0.369, loss_ctc=59.005, loss_att=55.471, acc=0.672, loss=56.531, backward_time=0.443, grad_norm=32.600, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.102, optim0_lr0=3.533e-04, train_time=1.593 +[gpub078:0/16] 2024-01-25 04:30:04,906 (trainer:737) INFO: 9epoch:train:201-300batch: iter_time=1.003e-04, forward_time=0.287, loss_ctc=54.760, loss_att=53.822, acc=0.661, loss=54.103, backward_time=0.402, grad_norm=31.011, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.094, optim0_lr0=3.532e-04, train_time=1.418 +[gpub078:0/16] 2024-01-25 04:32:54,617 (trainer:737) INFO: 9epoch:train:301-400batch: iter_time=1.029e-04, forward_time=0.382, loss_ctc=61.752, loss_att=61.927, acc=0.676, loss=61.874, backward_time=0.441, grad_norm=31.582, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.101, optim0_lr0=3.530e-04, train_time=1.697 +[gpub078:0/16] 2024-01-25 04:35:26,603 (trainer:737) INFO: 9epoch:train:401-500batch: iter_time=2.467e-04, forward_time=0.311, loss_ctc=55.618, loss_att=50.012, acc=0.707, loss=51.694, backward_time=0.402, grad_norm=28.972, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.094, optim0_lr0=3.529e-04, train_time=1.520 +[gpub078:0/16] 2024-01-25 04:38:13,245 (trainer:737) INFO: 9epoch:train:501-600batch: iter_time=1.049e-04, forward_time=0.375, loss_ctc=64.141, loss_att=60.389, acc=0.678, loss=61.514, backward_time=0.420, grad_norm=32.275, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.101, optim0_lr0=3.527e-04, train_time=1.665 +[gpub078:0/16] 2024-01-25 04:40:39,823 (trainer:737) INFO: 9epoch:train:601-700batch: iter_time=9.987e-05, forward_time=0.296, loss_ctc=58.381, loss_att=56.918, acc=0.661, loss=57.357, backward_time=0.403, grad_norm=31.078, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.095, optim0_lr0=3.526e-04, train_time=1.466 +[gpub078:0/16] 2024-01-25 04:43:13,060 (trainer:737) INFO: 9epoch:train:701-800batch: iter_time=6.269e-04, forward_time=0.310, loss_ctc=68.866, loss_att=56.667, acc=0.679, loss=60.326, backward_time=0.412, grad_norm=33.344, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.097, optim0_lr0=3.525e-04, train_time=1.531 +[gpub078:0/16] 2024-01-25 04:45:57,510 (trainer:737) INFO: 9epoch:train:801-900batch: iter_time=9.241e-05, forward_time=0.372, loss_ctc=56.151, loss_att=54.649, acc=0.679, loss=55.100, backward_time=0.421, grad_norm=29.886, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.102, optim0_lr0=3.523e-04, train_time=1.645 +[gpub078:0/16] 2024-01-25 04:48:35,102 (trainer:737) INFO: 9epoch:train:901-1000batch: iter_time=1.942e-04, forward_time=0.345, loss_ctc=61.206, loss_att=59.848, acc=0.669, loss=60.256, backward_time=0.402, grad_norm=33.220, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.094, optim0_lr0=3.522e-04, train_time=1.576 +[gpub078:0/16] 2024-01-25 04:51:13,567 (trainer:737) INFO: 9epoch:train:1001-1100batch: iter_time=9.167e-05, forward_time=0.342, loss_ctc=64.709, loss_att=53.551, acc=0.677, loss=56.899, backward_time=0.446, grad_norm=33.499, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.098, optim0_lr0=3.520e-04, train_time=1.584 +[gpub078:0/16] 2024-01-25 04:53:53,492 (trainer:737) INFO: 9epoch:train:1101-1200batch: iter_time=7.517e-04, forward_time=0.349, loss_ctc=66.426, loss_att=66.681, acc=0.670, loss=66.604, backward_time=0.412, grad_norm=33.902, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.095, optim0_lr0=3.519e-04, train_time=1.598 +[gpub078:0/16] 2024-01-25 04:55:30,277 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub078:0/16] 2024-01-25 04:55:49,016 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 04:55:52,655 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 04:55:52,655 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub078:0/16] 2024-01-25 04:55:52,658 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 05:02:21,735 (trainer:737) INFO: 9epoch:train:1201-1300batch: iter_time=3.529, forward_time=0.298, loss_ctc=53.417, loss_att=58.993, acc=0.667, loss=57.320, backward_time=0.412, grad_norm=27.582, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.094, optim0_lr0=3.517e-04, train_time=5.083 +[gpub078:0/16] 2024-01-25 05:04:53,791 (trainer:737) INFO: 9epoch:train:1301-1400batch: iter_time=9.129e-05, forward_time=0.288, loss_ctc=63.420, loss_att=59.033, acc=0.675, loss=60.349, backward_time=0.401, grad_norm=33.927, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.093, optim0_lr0=3.516e-04, train_time=1.520 +[gpub078:0/16] 2024-01-25 05:07:19,992 (trainer:737) INFO: 9epoch:train:1401-1500batch: iter_time=9.284e-05, forward_time=0.341, loss_ctc=49.896, loss_att=47.035, acc=0.685, loss=47.894, backward_time=0.427, grad_norm=27.144, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.106, optim0_lr0=3.514e-04, train_time=1.461 +[gpub078:0/16] 2024-01-25 05:10:02,051 (trainer:737) INFO: 9epoch:train:1501-1600batch: iter_time=1.014e-04, forward_time=0.310, loss_ctc=61.557, loss_att=59.105, acc=0.678, loss=59.841, backward_time=0.418, grad_norm=34.578, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.094, optim0_lr0=3.513e-04, train_time=1.621 +[gpub078:0/16] 2024-01-25 05:12:43,589 (trainer:737) INFO: 9epoch:train:1601-1700batch: iter_time=8.843e-05, forward_time=0.293, loss_ctc=55.529, loss_att=49.111, acc=0.707, loss=51.036, backward_time=0.406, grad_norm=28.926, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.094, optim0_lr0=3.511e-04, train_time=1.615 +[gpub078:0/16] 2024-01-25 05:15:05,017 (trainer:737) INFO: 9epoch:train:1701-1800batch: iter_time=8.935e-05, forward_time=0.288, loss_ctc=62.456, loss_att=57.150, acc=0.692, loss=58.742, backward_time=0.401, grad_norm=31.292, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.093, optim0_lr0=3.510e-04, train_time=1.414 +[gpub078:0/16] 2024-01-25 05:17:40,631 (trainer:737) INFO: 9epoch:train:1801-1900batch: iter_time=8.920e-05, forward_time=0.335, loss_ctc=57.350, loss_att=59.875, acc=0.671, loss=59.117, backward_time=0.435, grad_norm=29.049, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.097, optim0_lr0=3.509e-04, train_time=1.555 +[gpub078:0/16] 2024-01-25 05:20:19,028 (trainer:737) INFO: 9epoch:train:1901-2000batch: iter_time=8.694e-05, forward_time=0.318, loss_ctc=62.641, loss_att=57.904, acc=0.663, loss=59.325, backward_time=0.415, grad_norm=32.176, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.095, optim0_lr0=3.507e-04, train_time=1.584 +[gpub078:0/16] 2024-01-25 05:22:45,502 (trainer:737) INFO: 9epoch:train:2001-2100batch: iter_time=8.912e-05, forward_time=0.302, loss_ctc=61.589, loss_att=48.002, acc=0.703, loss=52.078, backward_time=0.402, grad_norm=31.476, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.094, optim0_lr0=3.506e-04, train_time=1.464 +[gpub078:0/16] 2024-01-25 05:25:06,060 (trainer:737) INFO: 9epoch:train:2101-2200batch: iter_time=9.821e-05, forward_time=0.289, loss_ctc=59.195, loss_att=58.164, acc=0.677, loss=58.473, backward_time=0.401, grad_norm=31.306, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.093, optim0_lr0=3.504e-04, train_time=1.405 +[gpub078:0/16] 2024-01-25 05:27:47,132 (trainer:737) INFO: 9epoch:train:2201-2300batch: iter_time=9.426e-05, forward_time=0.355, loss_ctc=61.097, loss_att=59.837, acc=0.655, loss=60.215, backward_time=0.425, grad_norm=33.082, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.099, optim0_lr0=3.503e-04, train_time=1.610 +[gpub078:0/16] 2024-01-25 05:30:20,514 (trainer:737) INFO: 9epoch:train:2301-2400batch: iter_time=8.975e-05, forward_time=0.318, loss_ctc=57.810, loss_att=53.481, acc=0.710, loss=54.780, backward_time=0.414, grad_norm=28.770, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.097, optim0_lr0=3.501e-04, train_time=1.534 +[gpub078:0/16] 2024-01-25 05:32:55,568 (trainer:737) INFO: 9epoch:train:2401-2500batch: iter_time=1.024e-04, forward_time=0.294, loss_ctc=65.875, loss_att=70.161, acc=0.648, loss=68.875, backward_time=0.415, grad_norm=33.568, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.094, optim0_lr0=3.500e-04, train_time=1.550 +[gpub078:0/16] 2024-01-25 05:33:15,749 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub078:0/16] 2024-01-25 05:33:34,259 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 05:33:38,084 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 05:33:38,084 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub078:0/16] 2024-01-25 05:33:38,087 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 05:41:22,060 (trainer:737) INFO: 9epoch:train:2501-2600batch: iter_time=3.544, forward_time=0.310, loss_ctc=56.410, loss_att=53.982, acc=0.705, loss=54.710, backward_time=0.423, grad_norm=29.857, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.097, optim0_lr0=3.499e-04, train_time=5.065 +[gpub078:0/16] 2024-01-25 05:43:46,744 (trainer:737) INFO: 9epoch:train:2601-2700batch: iter_time=8.820e-05, forward_time=0.320, loss_ctc=55.353, loss_att=53.945, acc=0.681, loss=54.367, backward_time=0.419, grad_norm=30.859, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.099, optim0_lr0=3.497e-04, train_time=1.446 +[gpub078:0/16] 2024-01-25 05:46:31,959 (trainer:737) INFO: 9epoch:train:2701-2800batch: iter_time=8.713e-05, forward_time=0.334, loss_ctc=52.599, loss_att=54.481, acc=0.671, loss=53.917, backward_time=0.409, grad_norm=28.744, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.095, optim0_lr0=3.496e-04, train_time=1.652 +[gpub078:0/16] 2024-01-25 05:49:10,313 (trainer:737) INFO: 9epoch:train:2801-2900batch: iter_time=9.402e-05, forward_time=0.290, loss_ctc=59.565, loss_att=60.900, acc=0.687, loss=60.500, backward_time=0.402, grad_norm=29.283, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.093, optim0_lr0=3.494e-04, train_time=1.583 +[gpub078:0/16] 2024-01-25 05:51:35,879 (trainer:737) INFO: 9epoch:train:2901-3000batch: iter_time=8.970e-05, forward_time=0.334, loss_ctc=54.097, loss_att=51.993, acc=0.709, loss=52.625, backward_time=0.409, grad_norm=28.436, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.096, optim0_lr0=3.493e-04, train_time=1.456 +[gpub078:0/16] 2024-01-25 05:54:36,079 (trainer:737) INFO: 9epoch:train:3001-3100batch: iter_time=8.920e-05, forward_time=0.314, loss_ctc=61.978, loss_att=61.376, acc=0.685, loss=61.557, backward_time=0.447, grad_norm=31.657, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.097, optim0_lr0=3.491e-04, train_time=1.801 +[gpub078:0/16] 2024-01-25 05:57:07,983 (trainer:737) INFO: 9epoch:train:3101-3200batch: iter_time=9.091e-05, forward_time=0.316, loss_ctc=56.404, loss_att=58.187, acc=0.675, loss=57.652, backward_time=0.424, grad_norm=30.671, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.096, optim0_lr0=3.490e-04, train_time=1.518 +[gpub078:0/16] 2024-01-25 05:59:35,628 (trainer:737) INFO: 9epoch:train:3201-3300batch: iter_time=9.890e-05, forward_time=0.292, loss_ctc=65.147, loss_att=56.550, acc=0.697, loss=59.129, backward_time=0.402, grad_norm=31.757, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.093, optim0_lr0=3.489e-04, train_time=1.477 +[gpub078:0/16] 2024-01-25 06:02:29,588 (trainer:737) INFO: 9epoch:train:3301-3400batch: iter_time=1.079e-04, forward_time=0.311, loss_ctc=54.830, loss_att=53.576, acc=0.694, loss=53.952, backward_time=0.414, grad_norm=33.398, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.094, optim0_lr0=3.487e-04, train_time=1.740 +[gpub078:0/16] 2024-01-25 06:04:58,315 (trainer:737) INFO: 9epoch:train:3401-3500batch: iter_time=9.897e-05, forward_time=0.332, loss_ctc=58.909, loss_att=56.927, acc=0.688, loss=57.521, backward_time=0.424, grad_norm=31.943, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.098, optim0_lr0=3.486e-04, train_time=1.486 +[gpub078:0/16] 2024-01-25 06:07:53,515 (trainer:737) INFO: 9epoch:train:3501-3600batch: iter_time=9.266e-05, forward_time=0.309, loss_ctc=60.721, loss_att=52.786, acc=0.690, loss=55.166, backward_time=0.445, grad_norm=48.163, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.098, optim0_lr0=3.484e-04, train_time=1.751 +[gpub078:0/16] 2024-01-25 06:10:17,720 (trainer:737) INFO: 9epoch:train:3601-3700batch: iter_time=9.478e-05, forward_time=0.293, loss_ctc=64.251, loss_att=66.144, acc=0.685, loss=65.576, backward_time=0.407, grad_norm=31.757, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.093, optim0_lr0=3.483e-04, train_time=1.443 +[gpub078:0/16] 2024-01-25 06:11:54,072 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub078:0/16] 2024-01-25 06:12:12,892 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 06:12:16,491 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 06:12:16,491 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub078:0/16] 2024-01-25 06:12:16,494 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 06:18:49,582 (trainer:737) INFO: 9epoch:train:3701-3800batch: iter_time=3.559, forward_time=0.349, loss_ctc=52.426, loss_att=59.834, acc=0.680, loss=57.612, backward_time=0.411, grad_norm=26.897, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.094, optim0_lr0=3.482e-04, train_time=5.119 +[gpub078:0/16] 2024-01-25 06:21:14,919 (trainer:737) INFO: 9epoch:train:3801-3900batch: iter_time=7.853e-05, forward_time=0.307, loss_ctc=62.954, loss_att=59.195, acc=0.687, loss=60.323, backward_time=0.408, grad_norm=32.846, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.095, optim0_lr0=3.480e-04, train_time=1.453 +[gpub078:0/16] 2024-01-25 06:23:44,466 (trainer:737) INFO: 9epoch:train:3901-4000batch: iter_time=8.467e-05, forward_time=0.307, loss_ctc=49.089, loss_att=46.298, acc=0.697, loss=47.135, backward_time=0.412, grad_norm=25.726, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.094, optim0_lr0=3.479e-04, train_time=1.494 +[gpub078:0/16] 2024-01-25 06:26:27,197 (trainer:737) INFO: 9epoch:train:4001-4100batch: iter_time=9.564e-05, forward_time=0.289, loss_ctc=59.873, loss_att=60.570, acc=0.683, loss=60.361, backward_time=0.401, grad_norm=31.724, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.477e-04, train_time=1.628 +[gpub078:0/16] 2024-01-25 06:29:09,660 (trainer:737) INFO: 9epoch:train:4101-4200batch: iter_time=9.721e-05, forward_time=0.335, loss_ctc=54.122, loss_att=49.700, acc=0.708, loss=51.026, backward_time=0.450, grad_norm=27.359, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.098, optim0_lr0=3.476e-04, train_time=1.624 +[gpub078:0/16] 2024-01-25 06:31:37,093 (trainer:737) INFO: 9epoch:train:4201-4300batch: iter_time=8.924e-05, forward_time=0.295, loss_ctc=60.628, loss_att=59.329, acc=0.694, loss=59.719, backward_time=0.410, grad_norm=28.586, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.094, optim0_lr0=3.475e-04, train_time=1.474 +[gpub078:0/16] 2024-01-25 06:34:38,793 (trainer:737) INFO: 9epoch:train:4301-4400batch: iter_time=8.784e-05, forward_time=0.315, loss_ctc=56.065, loss_att=59.942, acc=0.687, loss=58.779, backward_time=0.426, grad_norm=29.100, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.095, optim0_lr0=3.473e-04, train_time=1.817 +[gpub078:0/16] 2024-01-25 06:36:56,767 (trainer:737) INFO: 9epoch:train:4401-4500batch: iter_time=8.555e-05, forward_time=0.288, loss_ctc=61.403, loss_att=58.853, acc=0.678, loss=59.618, backward_time=0.402, grad_norm=30.535, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.472e-04, train_time=1.377 +[gpub078:0/16] 2024-01-25 06:39:36,916 (trainer:737) INFO: 9epoch:train:4501-4600batch: iter_time=8.295e-05, forward_time=0.377, loss_ctc=59.081, loss_att=49.538, acc=0.708, loss=52.401, backward_time=0.426, grad_norm=29.866, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.096, optim0_lr0=3.470e-04, train_time=1.603 +[gpub078:0/16] 2024-01-25 06:42:22,272 (trainer:737) INFO: 9epoch:train:4601-4700batch: iter_time=9.822e-05, forward_time=0.292, loss_ctc=57.820, loss_att=56.568, acc=0.691, loss=56.943, backward_time=0.412, grad_norm=29.059, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.094, optim0_lr0=3.469e-04, train_time=1.653 +[gpub078:0/16] 2024-01-25 06:44:53,529 (trainer:737) INFO: 9epoch:train:4701-4800batch: iter_time=9.653e-05, forward_time=0.314, loss_ctc=59.238, loss_att=59.146, acc=0.670, loss=59.174, backward_time=0.412, grad_norm=31.450, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.096, optim0_lr0=3.468e-04, train_time=1.512 +[gpub078:0/16] 2024-01-25 06:47:24,344 (trainer:737) INFO: 9epoch:train:4801-4900batch: iter_time=9.248e-05, forward_time=0.295, loss_ctc=56.783, loss_att=52.678, acc=0.726, loss=53.909, backward_time=0.404, grad_norm=27.603, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.466e-04, train_time=1.508 +[gpub078:0/16] 2024-01-25 06:50:28,642 (trainer:737) INFO: 9epoch:train:4901-5000batch: iter_time=1.020e-04, forward_time=0.347, loss_ctc=65.423, loss_att=72.750, acc=0.659, loss=70.552, backward_time=0.457, grad_norm=34.871, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.102, optim0_lr0=3.465e-04, train_time=1.841 +[gpub078:0/16] 2024-01-25 06:50:48,745 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub078:0/16] 2024-01-25 06:51:07,553 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 06:51:11,121 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 06:51:11,121 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub078:0/16] 2024-01-25 06:51:11,125 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 06:58:48,024 (trainer:737) INFO: 9epoch:train:5001-5100batch: iter_time=3.552, forward_time=0.320, loss_ctc=55.574, loss_att=52.878, acc=0.694, loss=53.687, backward_time=0.406, grad_norm=29.521, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.095, optim0_lr0=3.463e-04, train_time=4.995 +[gpub078:0/16] 2024-01-25 07:01:15,201 (trainer:737) INFO: 9epoch:train:5101-5200batch: iter_time=7.973e-05, forward_time=0.292, loss_ctc=54.804, loss_att=52.383, acc=0.683, loss=53.109, backward_time=0.400, grad_norm=30.085, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.462e-04, train_time=1.472 +[gpub078:0/16] 2024-01-25 07:03:59,502 (trainer:737) INFO: 9epoch:train:5201-5300batch: iter_time=9.243e-05, forward_time=0.286, loss_ctc=51.425, loss_att=52.654, acc=0.667, loss=52.285, backward_time=0.397, grad_norm=30.294, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.461e-04, train_time=1.643 +[gpub078:0/16] 2024-01-25 07:06:38,585 (trainer:737) INFO: 9epoch:train:5301-5400batch: iter_time=6.859e-04, forward_time=0.435, loss_ctc=58.313, loss_att=58.866, acc=0.690, loss=58.700, backward_time=0.423, grad_norm=30.139, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.102, optim0_lr0=3.459e-04, train_time=1.589 +[gpub078:0/16] 2024-01-25 07:09:01,086 (trainer:737) INFO: 9epoch:train:5401-5500batch: iter_time=9.500e-05, forward_time=0.295, loss_ctc=53.114, loss_att=48.565, acc=0.717, loss=49.930, backward_time=0.410, grad_norm=28.123, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.458e-04, train_time=1.427 +[gpub078:0/16] 2024-01-25 07:11:50,390 (trainer:737) INFO: 9epoch:train:5501-5600batch: iter_time=9.089e-05, forward_time=0.291, loss_ctc=60.083, loss_att=58.062, acc=0.690, loss=58.668, backward_time=0.404, grad_norm=27.781, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.456e-04, train_time=1.693 +[gpub078:0/16] 2024-01-25 07:14:36,266 (trainer:737) INFO: 9epoch:train:5601-5700batch: iter_time=9.102e-05, forward_time=0.388, loss_ctc=55.847, loss_att=55.396, acc=0.674, loss=55.531, backward_time=0.431, grad_norm=29.605, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.123, optim0_lr0=3.455e-04, train_time=1.658 +[gpub078:0/16] 2024-01-25 07:17:06,128 (trainer:737) INFO: 9epoch:train:5701-5800batch: iter_time=8.549e-05, forward_time=0.315, loss_ctc=64.222, loss_att=54.231, acc=0.689, loss=57.228, backward_time=0.405, grad_norm=33.498, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.454e-04, train_time=1.497 +[gpub078:0/16] 2024-01-25 07:19:25,634 (trainer:737) INFO: 9epoch:train:5801-5900batch: iter_time=8.067e-05, forward_time=0.293, loss_ctc=53.268, loss_att=51.851, acc=0.692, loss=52.276, backward_time=0.404, grad_norm=29.280, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.452e-04, train_time=1.396 +[gpub078:0/16] 2024-01-25 07:22:04,603 (trainer:737) INFO: 9epoch:train:5901-6000batch: iter_time=9.022e-05, forward_time=0.290, loss_ctc=58.856, loss_att=57.611, acc=0.682, loss=57.985, backward_time=0.402, grad_norm=33.590, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.093, optim0_lr0=3.451e-04, train_time=1.589 +[gpub078:0/16] 2024-01-25 07:24:40,723 (trainer:737) INFO: 9epoch:train:6001-6100batch: iter_time=8.056e-05, forward_time=0.372, loss_ctc=59.550, loss_att=51.507, acc=0.687, loss=53.920, backward_time=0.427, grad_norm=31.001, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.101, optim0_lr0=3.450e-04, train_time=1.561 +[gpub078:0/16] 2024-01-25 07:27:07,481 (trainer:737) INFO: 9epoch:train:6101-6200batch: iter_time=9.125e-05, forward_time=0.307, loss_ctc=63.535, loss_att=64.133, acc=0.680, loss=63.954, backward_time=0.417, grad_norm=34.189, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.448e-04, train_time=1.467 +[gpub078:0/16] 2024-01-25 07:29:09,607 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub078:0/16] 2024-01-25 07:29:28,441 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 07:29:31,994 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 07:29:31,995 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub078:0/16] 2024-01-25 07:29:31,998 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 07:36:51,410 (trainer:737) INFO: 9epoch:train:6201-6300batch: iter_time=4.016, forward_time=0.292, loss_ctc=52.397, loss_att=59.314, acc=0.677, loss=57.239, backward_time=0.401, grad_norm=29.134, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.447e-04, train_time=5.839 +[gpub078:0/16] 2024-01-25 07:39:22,118 (trainer:737) INFO: 9epoch:train:6301-6400batch: iter_time=9.030e-05, forward_time=0.363, loss_ctc=60.903, loss_att=58.552, acc=0.689, loss=59.258, backward_time=0.442, grad_norm=32.438, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.103, optim0_lr0=3.446e-04, train_time=1.507 +[gpub078:0/16] 2024-01-25 07:42:18,883 (trainer:737) INFO: 9epoch:train:6401-6500batch: iter_time=9.034e-05, forward_time=0.302, loss_ctc=48.304, loss_att=45.989, acc=0.701, loss=46.684, backward_time=0.406, grad_norm=27.836, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.094, optim0_lr0=3.444e-04, train_time=1.767 +[gpub078:0/16] 2024-01-25 07:44:37,152 (trainer:737) INFO: 9epoch:train:6501-6600batch: iter_time=9.243e-05, forward_time=0.296, loss_ctc=58.945, loss_att=60.052, acc=0.688, loss=59.720, backward_time=0.406, grad_norm=30.464, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.095, optim0_lr0=3.443e-04, train_time=1.382 +[gpub078:0/16] 2024-01-25 07:47:29,677 (trainer:737) INFO: 9epoch:train:6601-6700batch: iter_time=1.857e-04, forward_time=0.437, loss_ctc=53.644, loss_att=48.697, acc=0.711, loss=50.181, backward_time=0.415, grad_norm=26.605, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.099, optim0_lr0=3.441e-04, train_time=1.726 +[gpub078:0/16] 2024-01-25 07:49:59,196 (trainer:737) INFO: 9epoch:train:6701-6800batch: iter_time=1.005e-04, forward_time=0.311, loss_ctc=59.800, loss_att=58.200, acc=0.698, loss=58.680, backward_time=0.406, grad_norm=29.499, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.094, optim0_lr0=3.440e-04, train_time=1.495 +[gpub078:0/16] 2024-01-25 07:52:49,543 (trainer:737) INFO: 9epoch:train:6801-6900batch: iter_time=9.041e-05, forward_time=0.296, loss_ctc=55.968, loss_att=60.875, acc=0.685, loss=59.403, backward_time=0.406, grad_norm=28.881, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.094, optim0_lr0=3.439e-04, train_time=1.703 +[gpub078:0/16] 2024-01-25 07:55:23,751 (trainer:737) INFO: 9epoch:train:6901-7000batch: iter_time=9.819e-05, forward_time=0.442, loss_ctc=60.561, loss_att=58.217, acc=0.681, loss=58.920, backward_time=0.419, grad_norm=29.750, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.097, optim0_lr0=3.437e-04, train_time=1.543 +[gpub078:0/16] 2024-01-25 07:58:13,687 (trainer:737) INFO: 9epoch:train:7001-7100batch: iter_time=9.000e-05, forward_time=0.312, loss_ctc=58.145, loss_att=48.770, acc=0.710, loss=51.582, backward_time=0.408, grad_norm=29.698, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.436e-04, train_time=1.699 +[gpub078:0/16] 2024-01-25 08:00:42,911 (trainer:737) INFO: 9epoch:train:7101-7200batch: iter_time=1.008e-04, forward_time=0.292, loss_ctc=57.323, loss_att=57.149, acc=0.690, loss=57.201, backward_time=0.406, grad_norm=29.957, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.435e-04, train_time=1.492 +[gpub078:0/16] 2024-01-25 08:03:08,577 (trainer:737) INFO: 9epoch:train:7201-7300batch: iter_time=9.678e-05, forward_time=0.288, loss_ctc=58.025, loss_att=59.428, acc=0.667, loss=59.007, backward_time=0.400, grad_norm=30.398, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.433e-04, train_time=1.456 +[gpub078:0/16] 2024-01-25 08:05:56,796 (trainer:737) INFO: 9epoch:train:7301-7400batch: iter_time=7.855e-04, forward_time=0.385, loss_ctc=56.342, loss_att=51.666, acc=0.732, loss=53.069, backward_time=0.435, grad_norm=26.421, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.105, optim0_lr0=3.432e-04, train_time=1.683 +[gpub078:0/16] 2024-01-25 08:08:30,315 (trainer:737) INFO: 9epoch:train:7401-7500batch: iter_time=9.247e-05, forward_time=0.292, loss_ctc=65.134, loss_att=72.387, acc=0.661, loss=70.211, backward_time=0.405, grad_norm=35.113, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.431e-04, train_time=1.535 +[gpub078:0/16] 2024-01-25 08:08:50,343 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub078:0/16] 2024-01-25 08:09:09,415 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 08:09:13,089 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 08:09:13,089 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub078:0/16] 2024-01-25 08:09:13,092 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 08:16:48,904 (trainer:737) INFO: 9epoch:train:7501-7600batch: iter_time=3.533, forward_time=0.304, loss_ctc=55.867, loss_att=50.522, acc=0.715, loss=52.125, backward_time=0.404, grad_norm=29.311, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.429e-04, train_time=4.986 +[gpub078:0/16] 2024-01-25 08:19:29,513 (trainer:737) INFO: 9epoch:train:7601-7700batch: iter_time=9.815e-05, forward_time=0.359, loss_ctc=54.489, loss_att=52.500, acc=0.687, loss=53.097, backward_time=0.458, grad_norm=29.702, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.099, optim0_lr0=3.428e-04, train_time=1.606 +[gpub078:0/16] 2024-01-25 08:22:07,875 (trainer:737) INFO: 9epoch:train:7701-7800batch: iter_time=9.448e-05, forward_time=0.310, loss_ctc=51.120, loss_att=52.516, acc=0.682, loss=52.097, backward_time=0.402, grad_norm=27.800, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.094, optim0_lr0=3.427e-04, train_time=1.584 +[gpub078:0/16] 2024-01-25 08:24:54,551 (trainer:737) INFO: 9epoch:train:7801-7900batch: iter_time=9.512e-05, forward_time=0.296, loss_ctc=57.338, loss_att=58.736, acc=0.697, loss=58.316, backward_time=0.405, grad_norm=29.190, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.094, optim0_lr0=3.425e-04, train_time=1.666 +[gpub078:0/16] 2024-01-25 08:27:21,336 (trainer:737) INFO: 9epoch:train:7901-8000batch: iter_time=1.038e-04, forward_time=0.288, loss_ctc=52.796, loss_att=50.168, acc=0.718, loss=50.957, backward_time=0.401, grad_norm=27.586, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.093, optim0_lr0=3.424e-04, train_time=1.468 +[gpub078:0/16] 2024-01-25 08:30:14,223 (trainer:737) INFO: 9epoch:train:8001-8100batch: iter_time=9.110e-05, forward_time=0.412, loss_ctc=58.918, loss_att=59.714, acc=0.692, loss=59.475, backward_time=0.435, grad_norm=28.761, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.102, optim0_lr0=3.423e-04, train_time=1.728 +[gpub078:0/16] 2024-01-25 08:32:47,495 (trainer:737) INFO: 9epoch:train:8101-8200batch: iter_time=9.009e-05, forward_time=0.289, loss_ctc=55.440, loss_att=56.430, acc=0.684, loss=56.133, backward_time=0.401, grad_norm=30.503, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.421e-04, train_time=1.533 +[gpub078:0/16] 2024-01-25 08:35:18,211 (trainer:737) INFO: 9epoch:train:8201-8300batch: iter_time=9.655e-05, forward_time=0.295, loss_ctc=63.674, loss_att=54.506, acc=0.705, loss=57.256, backward_time=0.407, grad_norm=30.085, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.094, optim0_lr0=3.420e-04, train_time=1.506 +[gpub078:0/16] 2024-01-25 08:37:54,935 (trainer:737) INFO: 9epoch:train:8301-8400batch: iter_time=1.012e-04, forward_time=0.288, loss_ctc=53.127, loss_att=52.840, acc=0.698, loss=52.926, backward_time=0.400, grad_norm=27.930, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.419e-04, train_time=1.567 +[gpub078:0/16] 2024-01-25 08:40:38,872 (trainer:737) INFO: 9epoch:train:8401-8500batch: iter_time=9.835e-05, forward_time=0.370, loss_ctc=57.331, loss_att=56.834, acc=0.690, loss=56.983, backward_time=0.446, grad_norm=29.509, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.104, optim0_lr0=3.417e-04, train_time=1.639 +[gpub078:0/16] 2024-01-25 08:43:25,879 (trainer:737) INFO: 9epoch:train:8501-8600batch: iter_time=9.299e-05, forward_time=0.288, loss_ctc=58.336, loss_att=51.286, acc=0.699, loss=53.401, backward_time=0.399, grad_norm=30.280, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.416e-04, train_time=1.671 +[gpub078:0/16] 2024-01-25 08:45:56,030 (trainer:737) INFO: 9epoch:train:8601-8700batch: iter_time=1.099e-04, forward_time=0.296, loss_ctc=63.325, loss_att=63.844, acc=0.696, loss=63.688, backward_time=0.413, grad_norm=32.539, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.094, optim0_lr0=3.415e-04, train_time=1.501 +[gpub078:0/16] 2024-01-25 08:47:32,082 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub078:0/16] 2024-01-25 08:47:51,405 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 08:47:55,012 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 08:47:55,013 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub078:0/16] 2024-01-25 08:47:55,016 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 08:54:37,109 (trainer:737) INFO: 9epoch:train:8701-8800batch: iter_time=3.663, forward_time=0.395, loss_ctc=51.234, loss_att=60.741, acc=0.677, loss=57.889, backward_time=0.409, grad_norm=28.071, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.095, optim0_lr0=3.413e-04, train_time=5.210 +[gpub078:0/16] 2024-01-25 08:56:56,453 (trainer:737) INFO: 9epoch:train:8801-8900batch: iter_time=9.458e-05, forward_time=0.289, loss_ctc=59.989, loss_att=58.662, acc=0.681, loss=59.060, backward_time=0.403, grad_norm=33.972, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.094, optim0_lr0=3.412e-04, train_time=1.394 +[gpub078:0/16] 2024-01-25 08:59:43,610 (trainer:737) INFO: 9epoch:train:8901-9000batch: iter_time=9.364e-05, forward_time=0.294, loss_ctc=47.967, loss_att=46.540, acc=0.695, loss=46.968, backward_time=0.405, grad_norm=26.939, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.094, optim0_lr0=3.411e-04, train_time=1.671 +[gpub078:0/16] 2024-01-25 09:02:22,208 (trainer:737) INFO: 9epoch:train:9001-9100batch: iter_time=1.036e-04, forward_time=0.313, loss_ctc=57.761, loss_att=58.091, acc=0.686, loss=57.992, backward_time=0.405, grad_norm=31.757, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.099, optim0_lr0=3.409e-04, train_time=1.586 +[gpub078:0/16] 2024-01-25 09:04:54,677 (trainer:737) INFO: 9epoch:train:9101-9200batch: iter_time=1.019e-04, forward_time=0.377, loss_ctc=53.565, loss_att=48.506, acc=0.714, loss=50.024, backward_time=0.428, grad_norm=28.040, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.102, optim0_lr0=3.408e-04, train_time=1.524 +[gpub078:0/16] 2024-01-25 09:07:49,822 (trainer:737) INFO: 9epoch:train:9201-9300batch: iter_time=1.034e-04, forward_time=0.293, loss_ctc=58.735, loss_att=55.932, acc=0.701, loss=56.773, backward_time=0.406, grad_norm=29.157, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.407e-04, train_time=1.752 +[gpub078:0/16] 2024-01-25 09:10:20,185 (trainer:737) INFO: 9epoch:train:9301-9400batch: iter_time=9.511e-05, forward_time=0.295, loss_ctc=55.248, loss_att=58.771, acc=0.681, loss=57.714, backward_time=0.402, grad_norm=29.515, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.094, optim0_lr0=3.405e-04, train_time=1.503 +[gpub078:0/16] 2024-01-25 09:13:20,284 (trainer:737) INFO: 9epoch:train:9401-9500batch: iter_time=9.523e-05, forward_time=0.342, loss_ctc=59.189, loss_att=55.995, acc=0.672, loss=56.953, backward_time=0.475, grad_norm=29.006, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.101, optim0_lr0=3.404e-04, train_time=1.801 +[gpub078:0/16] 2024-01-25 09:15:43,267 (trainer:737) INFO: 9epoch:train:9501-9600batch: iter_time=8.492e-05, forward_time=0.288, loss_ctc=56.312, loss_att=46.468, acc=0.713, loss=49.421, backward_time=0.401, grad_norm=27.981, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.403e-04, train_time=1.429 +[gpub078:0/16] 2024-01-25 09:18:19,872 (trainer:737) INFO: 9epoch:train:9601-9700batch: iter_time=9.191e-05, forward_time=0.298, loss_ctc=57.082, loss_att=56.971, acc=0.686, loss=57.004, backward_time=0.406, grad_norm=29.876, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.401e-04, train_time=1.566 +[gpub078:0/16] 2024-01-25 09:20:51,760 (trainer:737) INFO: 9epoch:train:9701-9800batch: iter_time=9.288e-05, forward_time=0.288, loss_ctc=58.290, loss_att=58.109, acc=0.666, loss=58.163, backward_time=0.402, grad_norm=32.255, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.400e-04, train_time=1.519 +[gpub078:0/16] 2024-01-25 09:23:55,841 (trainer:737) INFO: 9epoch:train:9801-9900batch: iter_time=0.001, forward_time=0.385, loss_ctc=55.443, loss_att=52.497, acc=0.717, loss=53.381, backward_time=0.432, grad_norm=27.596, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.104, optim0_lr0=3.399e-04, train_time=1.839 +[gpub078:0/16] 2024-01-25 09:26:16,073 (trainer:737) INFO: 9epoch:train:9901-10000batch: iter_time=9.743e-05, forward_time=0.291, loss_ctc=64.281, loss_att=69.744, acc=0.655, loss=68.106, backward_time=0.404, grad_norm=32.296, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.093, optim0_lr0=3.397e-04, train_time=1.403 +[gpub078:0/16] 2024-01-25 09:26:36,373 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub078:0/16] 2024-01-25 09:26:55,276 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 09:26:59,114 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 09:26:59,114 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub078:0/16] 2024-01-25 09:26:59,117 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 09:35:11,427 (trainer:737) INFO: 9epoch:train:10001-10100batch: iter_time=3.516, forward_time=0.299, loss_ctc=54.895, loss_att=51.114, acc=0.717, loss=52.248, backward_time=0.402, grad_norm=29.228, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.093, optim0_lr0=3.396e-04, train_time=5.354 +[gpub078:0/16] 2024-01-25 09:38:17,380 (trainer:737) INFO: 9epoch:train:10101-10200batch: iter_time=9.036e-05, forward_time=0.402, loss_ctc=53.419, loss_att=51.897, acc=0.691, loss=52.353, backward_time=0.454, grad_norm=30.228, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.110, optim0_lr0=3.395e-04, train_time=1.859 +[gpub078:0/16] 2024-01-25 09:41:01,951 (trainer:737) INFO: 9epoch:train:10201-10300batch: iter_time=9.266e-05, forward_time=0.286, loss_ctc=50.236, loss_att=52.560, acc=0.683, loss=51.863, backward_time=0.398, grad_norm=28.575, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.394e-04, train_time=1.645 +[gpub078:0/16] 2024-01-25 09:43:42,443 (trainer:737) INFO: 9epoch:train:10301-10400batch: iter_time=9.666e-05, forward_time=0.297, loss_ctc=57.287, loss_att=58.842, acc=0.697, loss=58.376, backward_time=0.407, grad_norm=29.781, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.392e-04, train_time=1.605 +[gpub078:0/16] 2024-01-25 09:46:01,701 (trainer:737) INFO: 9epoch:train:10401-10500batch: iter_time=8.632e-05, forward_time=0.318, loss_ctc=52.574, loss_att=50.178, acc=0.719, loss=50.897, backward_time=0.401, grad_norm=27.266, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.102, optim0_lr0=3.391e-04, train_time=1.393 +[gpub078:0/16] 2024-01-25 09:49:17,034 (trainer:737) INFO: 9epoch:train:10501-10600batch: iter_time=1.000e-04, forward_time=0.402, loss_ctc=59.107, loss_att=58.614, acc=0.695, loss=58.762, backward_time=0.418, grad_norm=29.523, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.095, optim0_lr0=3.390e-04, train_time=1.953 +[gpub078:0/16] 2024-01-25 09:51:48,583 (trainer:737) INFO: 9epoch:train:10601-10700batch: iter_time=9.121e-05, forward_time=0.294, loss_ctc=54.220, loss_att=55.691, acc=0.687, loss=55.250, backward_time=0.406, grad_norm=29.257, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.388e-04, train_time=1.514 +[gpub078:0/16] 2024-01-25 09:54:01,691 (trainer:737) INFO: 9epoch:train:10701-10800batch: iter_time=9.352e-05, forward_time=0.292, loss_ctc=62.157, loss_att=54.379, acc=0.707, loss=56.713, backward_time=0.405, grad_norm=29.567, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.387e-04, train_time=1.332 +[gpub078:0/16] 2024-01-25 09:57:16,626 (trainer:737) INFO: 9epoch:train:10801-10900batch: iter_time=5.044e-04, forward_time=0.381, loss_ctc=51.617, loss_att=51.606, acc=0.704, loss=51.610, backward_time=0.436, grad_norm=27.720, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.106, optim0_lr0=3.386e-04, train_time=1.950 +[gpub078:0/16] 2024-01-25 09:59:50,469 (trainer:737) INFO: 9epoch:train:10901-11000batch: iter_time=8.967e-05, forward_time=0.291, loss_ctc=57.103, loss_att=55.539, acc=0.697, loss=56.008, backward_time=0.409, grad_norm=31.074, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.384e-04, train_time=1.538 +[gpub078:0/16] 2024-01-25 10:02:30,884 (trainer:737) INFO: 9epoch:train:11001-11100batch: iter_time=9.167e-05, forward_time=0.291, loss_ctc=58.106, loss_att=50.797, acc=0.701, loss=52.990, backward_time=0.400, grad_norm=30.815, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.093, optim0_lr0=3.383e-04, train_time=1.602 +[gpub078:0/16] 2024-01-25 10:05:15,914 (trainer:737) INFO: 9epoch:train:11101-11200batch: iter_time=0.001, forward_time=0.363, loss_ctc=62.652, loss_att=63.654, acc=0.698, loss=63.353, backward_time=0.443, grad_norm=30.691, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.101, optim0_lr0=3.382e-04, train_time=1.652 +[gpub078:0/16] 2024-01-25 10:06:49,318 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub078:0/16] 2024-01-25 10:07:08,499 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 10:07:11,989 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 10:07:11,989 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub078:0/16] 2024-01-25 10:07:11,993 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 10:14:10,831 (trainer:737) INFO: 9epoch:train:11201-11300batch: iter_time=3.729, forward_time=0.295, loss_ctc=50.740, loss_att=58.832, acc=0.687, loss=56.404, backward_time=0.403, grad_norm=27.059, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.381e-04, train_time=5.349 +[gpub078:0/16] 2024-01-25 10:16:39,767 (trainer:737) INFO: 9epoch:train:11301-11400batch: iter_time=8.083e-05, forward_time=0.369, loss_ctc=59.512, loss_att=56.295, acc=0.698, loss=57.260, backward_time=0.420, grad_norm=32.421, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.111, optim0_lr0=3.379e-04, train_time=1.489 +[gpub078:0/16] 2024-01-25 10:19:32,015 (trainer:737) INFO: 9epoch:train:11401-11500batch: iter_time=8.274e-05, forward_time=0.338, loss_ctc=47.187, loss_att=45.014, acc=0.705, loss=45.666, backward_time=0.404, grad_norm=25.910, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.094, optim0_lr0=3.378e-04, train_time=1.722 +[gpub078:0/16] 2024-01-25 10:22:02,271 (trainer:737) INFO: 9epoch:train:11501-11600batch: iter_time=2.416e-04, forward_time=0.359, loss_ctc=57.435, loss_att=58.194, acc=0.695, loss=57.966, backward_time=0.415, grad_norm=30.598, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.111, optim0_lr0=3.377e-04, train_time=1.503 +[gpub078:0/16] 2024-01-25 10:25:09,172 (trainer:737) INFO: 9epoch:train:11601-11700batch: iter_time=8.059e-05, forward_time=0.287, loss_ctc=52.553, loss_att=48.165, acc=0.716, loss=49.481, backward_time=0.398, grad_norm=27.122, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.093, optim0_lr0=3.375e-04, train_time=1.869 +[gpub078:0/16] 2024-01-25 10:27:33,983 (trainer:737) INFO: 9epoch:train:11701-11800batch: iter_time=8.470e-05, forward_time=0.340, loss_ctc=58.152, loss_att=57.493, acc=0.703, loss=57.690, backward_time=0.419, grad_norm=29.018, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.093, optim0_lr0=3.374e-04, train_time=1.448 +[gpub078:0/16] 2024-01-25 10:30:32,761 (trainer:737) INFO: 9epoch:train:11801-11900batch: iter_time=8.653e-05, forward_time=0.383, loss_ctc=54.853, loss_att=59.214, acc=0.691, loss=57.905, backward_time=0.416, grad_norm=28.930, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.111, optim0_lr0=3.373e-04, train_time=1.788 +[gpub078:0/16] 2024-01-25 10:33:35,101 (trainer:737) INFO: 9epoch:train:11901-12000batch: iter_time=8.581e-05, forward_time=0.320, loss_ctc=59.538, loss_att=57.115, acc=0.687, loss=57.842, backward_time=0.427, grad_norm=29.791, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.093, optim0_lr0=3.372e-04, train_time=1.823 +[gpub078:0/16] 2024-01-25 10:36:26,087 (trainer:737) INFO: 9epoch:train:12001-12100batch: iter_time=8.798e-05, forward_time=0.423, loss_ctc=55.999, loss_att=48.103, acc=0.716, loss=50.472, backward_time=0.423, grad_norm=29.659, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.105, optim0_lr0=3.370e-04, train_time=1.709 +[gpub078:0/16] 2024-01-25 10:39:00,352 (trainer:737) INFO: 9epoch:train:12101-12200batch: iter_time=8.126e-05, forward_time=0.289, loss_ctc=55.876, loss_att=55.290, acc=0.697, loss=55.466, backward_time=0.401, grad_norm=28.971, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.369e-04, train_time=1.543 +[gpub078:0/16] 2024-01-25 10:41:58,556 (trainer:737) INFO: 9epoch:train:12201-12300batch: iter_time=4.306e-04, forward_time=0.416, loss_ctc=56.790, loss_att=58.385, acc=0.673, loss=57.907, backward_time=0.429, grad_norm=29.538, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.116, optim0_lr0=3.368e-04, train_time=1.782 +[gpub078:0/16] 2024-01-25 10:45:01,343 (trainer:737) INFO: 9epoch:train:12301-12400batch: iter_time=8.531e-05, forward_time=0.289, loss_ctc=55.117, loss_att=50.474, acc=0.735, loss=51.866, backward_time=0.402, grad_norm=27.987, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.367e-04, train_time=1.827 +[gpub078:0/16] 2024-01-25 10:47:14,470 (trainer:737) INFO: 9epoch:train:12401-12500batch: iter_time=8.443e-05, forward_time=0.293, loss_ctc=63.604, loss_att=70.626, acc=0.667, loss=68.520, backward_time=0.407, grad_norm=33.475, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.365e-04, train_time=1.331 +[gpub078:0/16] 2024-01-25 10:47:34,497 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub078:0/16] 2024-01-25 10:47:54,331 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 10:47:58,049 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 10:47:58,050 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub078:0/16] 2024-01-25 10:47:58,053 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 10:55:29,655 (trainer:737) INFO: 9epoch:train:12501-12600batch: iter_time=3.283, forward_time=0.390, loss_ctc=54.564, loss_att=52.655, acc=0.701, loss=53.228, backward_time=0.458, grad_norm=30.055, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.103, optim0_lr0=3.364e-04, train_time=4.951 +[gpub078:0/16] 2024-01-25 10:58:20,977 (trainer:737) INFO: 9epoch:train:12601-12700batch: iter_time=8.853e-05, forward_time=0.287, loss_ctc=53.200, loss_att=51.321, acc=0.691, loss=51.885, backward_time=0.399, grad_norm=30.310, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.363e-04, train_time=1.713 +[gpub078:0/16] 2024-01-25 11:00:50,260 (trainer:737) INFO: 9epoch:train:12701-12800batch: iter_time=1.068e-04, forward_time=0.288, loss_ctc=49.358, loss_att=51.372, acc=0.675, loss=50.768, backward_time=0.398, grad_norm=28.307, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.094, optim0_lr0=3.361e-04, train_time=1.493 +[gpub078:0/16] 2024-01-25 11:03:11,521 (trainer:737) INFO: 9epoch:train:12801-12900batch: iter_time=1.118e-04, forward_time=0.358, loss_ctc=56.689, loss_att=58.434, acc=0.695, loss=57.910, backward_time=0.424, grad_norm=29.067, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.097, optim0_lr0=3.360e-04, train_time=1.412 +[gpub078:0/16] 2024-01-25 11:05:53,141 (trainer:737) INFO: 9epoch:train:12901-13000batch: iter_time=1.018e-04, forward_time=0.358, loss_ctc=51.818, loss_att=47.390, acc=0.726, loss=48.719, backward_time=0.419, grad_norm=26.886, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.095, optim0_lr0=3.359e-04, train_time=1.616 +[gpub078:0/16] 2024-01-25 11:08:49,293 (trainer:737) INFO: 9epoch:train:13001-13100batch: iter_time=1.121e-04, forward_time=0.289, loss_ctc=58.388, loss_att=57.017, acc=0.696, loss=57.428, backward_time=0.402, grad_norm=28.188, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.358e-04, train_time=1.761 +[gpub078:0/16] 2024-01-25 11:11:08,823 (trainer:737) INFO: 9epoch:train:13101-13200batch: iter_time=1.107e-04, forward_time=0.358, loss_ctc=53.807, loss_att=53.468, acc=0.683, loss=53.570, backward_time=0.409, grad_norm=29.695, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.356e-04, train_time=1.395 +[gpub078:0/16] 2024-01-25 11:13:51,919 (trainer:737) INFO: 9epoch:train:13201-13300batch: iter_time=9.248e-05, forward_time=0.366, loss_ctc=61.383, loss_att=52.710, acc=0.696, loss=55.312, backward_time=0.432, grad_norm=29.135, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.103, optim0_lr0=3.355e-04, train_time=1.631 +[gpub078:0/16] 2024-01-25 11:16:38,492 (trainer:737) INFO: 9epoch:train:13301-13400batch: iter_time=8.456e-05, forward_time=0.287, loss_ctc=51.004, loss_att=50.948, acc=0.701, loss=50.965, backward_time=0.398, grad_norm=28.868, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.354e-04, train_time=1.665 +[gpub078:0/16] 2024-01-25 11:18:52,708 (trainer:737) INFO: 9epoch:train:13401-13500batch: iter_time=8.326e-05, forward_time=0.290, loss_ctc=57.385, loss_att=56.473, acc=0.689, loss=56.747, backward_time=0.401, grad_norm=29.898, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.093, optim0_lr0=3.353e-04, train_time=1.342 +[gpub078:0/16] 2024-01-25 11:21:34,296 (trainer:737) INFO: 9epoch:train:13501-13600batch: iter_time=4.202e-04, forward_time=0.351, loss_ctc=57.018, loss_att=49.954, acc=0.695, loss=52.073, backward_time=0.416, grad_norm=29.647, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.099, optim0_lr0=3.351e-04, train_time=1.616 +[gpub078:0/16] 2024-01-25 11:24:19,365 (trainer:737) INFO: 9epoch:train:13601-13700batch: iter_time=8.228e-05, forward_time=0.341, loss_ctc=61.638, loss_att=63.083, acc=0.687, loss=62.650, backward_time=0.422, grad_norm=32.480, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.097, optim0_lr0=3.350e-04, train_time=1.651 +[gpub078:0/16] 2024-01-25 11:26:01,978 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub078:0/16] 2024-01-25 11:26:21,396 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 11:26:24,956 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 11:26:24,956 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub078:0/16] 2024-01-25 11:26:24,960 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 11:32:50,469 (trainer:737) INFO: 9epoch:train:13701-13800batch: iter_time=3.436, forward_time=0.379, loss_ctc=51.148, loss_att=56.724, acc=0.683, loss=55.052, backward_time=0.411, grad_norm=26.890, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.096, optim0_lr0=3.349e-04, train_time=5.110 +[gpub078:0/16] 2024-01-25 11:35:16,380 (trainer:737) INFO: 9epoch:train:13801-13900batch: iter_time=8.232e-05, forward_time=0.349, loss_ctc=59.063, loss_att=56.510, acc=0.688, loss=57.276, backward_time=0.443, grad_norm=33.352, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.113, optim0_lr0=3.348e-04, train_time=1.459 +[gpub078:0/16] 2024-01-25 11:38:34,495 (trainer:737) INFO: 9epoch:train:13901-14000batch: iter_time=8.523e-05, forward_time=0.338, loss_ctc=47.114, loss_att=45.355, acc=0.700, loss=45.883, backward_time=0.466, grad_norm=25.710, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.096, optim0_lr0=3.346e-04, train_time=1.981 +[gpub078:0/16] 2024-01-25 11:41:09,467 (trainer:737) INFO: 9epoch:train:14001-14100batch: iter_time=9.229e-05, forward_time=0.290, loss_ctc=57.197, loss_att=56.727, acc=0.691, loss=56.868, backward_time=0.403, grad_norm=30.139, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.345e-04, train_time=1.549 +[gpub078:0/16] 2024-01-25 11:43:59,061 (trainer:737) INFO: 9epoch:train:14101-14200batch: iter_time=4.948e-04, forward_time=0.380, loss_ctc=51.924, loss_att=46.092, acc=0.723, loss=47.841, backward_time=0.475, grad_norm=28.079, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.103, optim0_lr0=3.344e-04, train_time=1.696 +[gpub078:0/16] 2024-01-25 11:46:34,395 (trainer:737) INFO: 9epoch:train:14201-14300batch: iter_time=9.066e-05, forward_time=0.290, loss_ctc=57.630, loss_att=55.143, acc=0.705, loss=55.889, backward_time=0.400, grad_norm=28.793, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.343e-04, train_time=1.553 +[gpub078:0/16] 2024-01-25 11:49:07,864 (trainer:737) INFO: 9epoch:train:14301-14400batch: iter_time=9.952e-05, forward_time=0.307, loss_ctc=54.512, loss_att=57.459, acc=0.685, loss=56.575, backward_time=0.401, grad_norm=30.301, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.341e-04, train_time=1.534 +[gpub078:0/16] 2024-01-25 11:52:04,291 (trainer:737) INFO: 9epoch:train:14401-14500batch: iter_time=9.332e-05, forward_time=0.423, loss_ctc=59.007, loss_att=55.309, acc=0.675, loss=56.418, backward_time=0.428, grad_norm=32.139, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.110, optim0_lr0=3.340e-04, train_time=1.764 +[gpub078:0/16] 2024-01-25 11:55:00,955 (trainer:737) INFO: 9epoch:train:14501-14600batch: iter_time=1.013e-04, forward_time=0.290, loss_ctc=55.826, loss_att=45.322, acc=0.717, loss=48.474, backward_time=0.399, grad_norm=27.729, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.339e-04, train_time=1.767 +[gpub078:0/16] 2024-01-25 11:57:20,738 (trainer:737) INFO: 9epoch:train:14601-14700batch: iter_time=9.664e-05, forward_time=0.289, loss_ctc=55.780, loss_att=54.998, acc=0.693, loss=55.232, backward_time=0.402, grad_norm=29.989, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.094, optim0_lr0=3.338e-04, train_time=1.398 +[gpub078:0/16] 2024-01-25 12:00:29,635 (trainer:737) INFO: 9epoch:train:14701-14800batch: iter_time=4.398e-04, forward_time=0.356, loss_ctc=55.934, loss_att=55.828, acc=0.673, loss=55.860, backward_time=0.515, grad_norm=75.187, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.104, optim0_lr0=3.336e-04, train_time=1.889 +[gpub078:0/16] 2024-01-25 12:03:27,691 (trainer:737) INFO: 9epoch:train:14801-14900batch: iter_time=9.342e-05, forward_time=0.289, loss_ctc=54.574, loss_att=52.167, acc=0.720, loss=52.889, backward_time=0.402, grad_norm=27.618, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.335e-04, train_time=1.780 +[gpub078:0/16] 2024-01-25 12:05:40,695 (trainer:737) INFO: 9epoch:train:14901-15000batch: iter_time=8.960e-05, forward_time=0.291, loss_ctc=62.961, loss_att=67.609, acc=0.662, loss=66.215, backward_time=0.406, grad_norm=33.629, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.334e-04, train_time=1.330 +[gpub078:0/16] 2024-01-25 12:44:00,505 (trainer:343) INFO: 9epoch results: [train] iter_time=0.284, forward_time=0.325, loss_ctc=57.321, loss_att=55.734, acc=0.690, loss=56.210, backward_time=0.416, grad_norm=30.458, clip=100.000, loss_scale=1.924e+24, optim_step_time=0.097, optim0_lr0=3.431e-04, train_time=1.874, time=7 hours, 49 minutes and 4.66 seconds, total_count=165000, gpu_max_cached_mem_GB=40.762, [valid] loss_ctc=52.523, cer_ctc=0.260, loss_att=49.861, acc=0.606, cer=0.363, wer=0.998, loss=50.659, time=37 minutes and 53.12 seconds, total_count=51381, gpu_max_cached_mem_GB=40.762 +[gpub078:0/16] 2024-01-25 12:44:11,030 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub078:0/16] 2024-01-25 12:44:11,085 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/4epoch.pth +[gpub078:0/16] 2024-01-25 12:44:11,085 (trainer:272) INFO: 10/45epoch started. Estimated time to finish: 2 weeks, 4 hours and 33 minutes +[gpub078:0/16] 2024-01-25 12:44:11,095 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub078:0/16] 2024-01-25 12:44:29,038 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 12:44:32,480 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 12:44:32,480 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub078:0/16] 2024-01-25 12:44:32,483 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 12:52:27,300 (trainer:737) INFO: 10epoch:train:1-100batch: iter_time=3.326, forward_time=0.347, loss_ctc=61.164, loss_att=60.284, acc=0.701, loss=60.548, backward_time=0.426, grad_norm=33.879, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.096, optim0_lr0=3.333e-04, train_time=4.962 +[gpub078:0/16] 2024-01-25 12:54:46,473 (trainer:737) INFO: 10epoch:train:101-200batch: iter_time=9.899e-05, forward_time=0.303, loss_ctc=60.816, loss_att=57.712, acc=0.711, loss=58.643, backward_time=0.408, grad_norm=30.739, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.095, optim0_lr0=3.331e-04, train_time=1.392 +[gpub078:0/16] 2024-01-25 12:57:11,818 (trainer:737) INFO: 10epoch:train:201-300batch: iter_time=9.762e-05, forward_time=0.342, loss_ctc=46.049, loss_att=43.950, acc=0.705, loss=44.580, backward_time=0.424, grad_norm=24.308, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.096, optim0_lr0=3.330e-04, train_time=1.453 +[gpub078:0/16] 2024-01-25 13:00:13,974 (trainer:737) INFO: 10epoch:train:301-400batch: iter_time=1.807e-04, forward_time=0.325, loss_ctc=65.874, loss_att=69.942, acc=0.689, loss=68.721, backward_time=0.463, grad_norm=31.780, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.101, optim0_lr0=3.329e-04, train_time=1.822 +[gpub078:0/16] 2024-01-25 13:02:53,561 (trainer:737) INFO: 10epoch:train:401-500batch: iter_time=1.076e-04, forward_time=0.303, loss_ctc=53.708, loss_att=56.163, acc=0.697, loss=55.427, backward_time=0.401, grad_norm=29.509, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.328e-04, train_time=1.596 +[gpub078:0/16] 2024-01-25 13:05:44,460 (trainer:737) INFO: 10epoch:train:501-600batch: iter_time=9.996e-05, forward_time=0.373, loss_ctc=58.052, loss_att=55.332, acc=0.726, loss=56.148, backward_time=0.419, grad_norm=27.823, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.097, optim0_lr0=3.327e-04, train_time=1.709 +[gpub078:0/16] 2024-01-25 13:08:46,222 (trainer:737) INFO: 10epoch:train:601-700batch: iter_time=9.820e-05, forward_time=0.364, loss_ctc=60.374, loss_att=56.752, acc=0.699, loss=57.838, backward_time=0.455, grad_norm=30.758, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.111, optim0_lr0=3.325e-04, train_time=1.816 +[gpub078:0/16] 2024-01-25 13:11:27,669 (trainer:737) INFO: 10epoch:train:701-800batch: iter_time=9.730e-05, forward_time=0.307, loss_ctc=53.848, loss_att=56.729, acc=0.692, loss=55.864, backward_time=0.417, grad_norm=28.549, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.093, optim0_lr0=3.324e-04, train_time=1.616 +[gpub078:0/16] 2024-01-25 13:14:09,543 (trainer:737) INFO: 10epoch:train:801-900batch: iter_time=1.025e-04, forward_time=0.298, loss_ctc=71.660, loss_att=64.594, acc=0.689, loss=66.714, backward_time=0.403, grad_norm=39.998, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.094, optim0_lr0=3.323e-04, train_time=1.619 +[gpub078:0/16] 2024-01-25 13:17:17,690 (trainer:737) INFO: 10epoch:train:901-1000batch: iter_time=1.049e-04, forward_time=0.378, loss_ctc=72.047, loss_att=71.813, acc=0.682, loss=71.884, backward_time=0.441, grad_norm=37.312, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.102, optim0_lr0=3.322e-04, train_time=1.881 +[gpub078:0/16] 2024-01-25 13:19:44,181 (trainer:737) INFO: 10epoch:train:1001-1100batch: iter_time=9.584e-05, forward_time=0.314, loss_ctc=53.015, loss_att=42.278, acc=0.733, loss=45.499, backward_time=0.420, grad_norm=24.804, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.098, optim0_lr0=3.320e-04, train_time=1.463 +[gpub078:0/16] 2024-01-25 13:22:11,413 (trainer:737) INFO: 10epoch:train:1101-1200batch: iter_time=8.353e-05, forward_time=0.302, loss_ctc=64.718, loss_att=68.031, acc=0.683, loss=67.037, backward_time=0.403, grad_norm=33.193, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.319e-04, train_time=1.473 +[gpub078:0/16] 2024-01-25 13:23:59,653 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub078:0/16] 2024-01-25 13:24:18,873 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 13:24:22,362 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 13:24:22,362 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub078:0/16] 2024-01-25 13:24:22,365 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 13:31:00,387 (trainer:737) INFO: 10epoch:train:1201-1300batch: iter_time=3.556, forward_time=0.386, loss_ctc=57.726, loss_att=53.951, acc=0.715, loss=55.084, backward_time=0.407, grad_norm=29.230, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.095, optim0_lr0=3.318e-04, train_time=5.290 +[gpub078:0/16] 2024-01-25 13:33:13,835 (trainer:737) INFO: 10epoch:train:1301-1400batch: iter_time=7.824e-05, forward_time=0.290, loss_ctc=58.052, loss_att=55.153, acc=0.690, loss=56.023, backward_time=0.402, grad_norm=31.450, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.317e-04, train_time=1.334 +[gpub078:0/16] 2024-01-25 13:35:40,626 (trainer:737) INFO: 10epoch:train:1401-1500batch: iter_time=8.151e-05, forward_time=0.311, loss_ctc=51.892, loss_att=50.922, acc=0.705, loss=51.213, backward_time=0.440, grad_norm=28.033, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.098, optim0_lr0=3.316e-04, train_time=1.467 +[gpub078:0/16] 2024-01-25 13:38:38,421 (trainer:737) INFO: 10epoch:train:1501-1600batch: iter_time=9.115e-05, forward_time=0.298, loss_ctc=50.366, loss_att=51.516, acc=0.690, loss=51.171, backward_time=0.400, grad_norm=26.154, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.094, optim0_lr0=3.314e-04, train_time=1.778 +[gpub078:0/16] 2024-01-25 13:41:37,348 (trainer:737) INFO: 10epoch:train:1601-1700batch: iter_time=9.326e-05, forward_time=0.402, loss_ctc=58.370, loss_att=63.438, acc=0.691, loss=61.918, backward_time=0.424, grad_norm=30.360, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.098, optim0_lr0=3.313e-04, train_time=1.789 +[gpub078:0/16] 2024-01-25 13:44:16,704 (trainer:737) INFO: 10epoch:train:1701-1800batch: iter_time=9.065e-05, forward_time=0.337, loss_ctc=59.927, loss_att=59.963, acc=0.697, loss=59.952, backward_time=0.423, grad_norm=30.719, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.106, optim0_lr0=3.312e-04, train_time=1.593 +[gpub078:0/16] 2024-01-25 13:47:16,069 (trainer:737) INFO: 10epoch:train:1801-1900batch: iter_time=9.025e-05, forward_time=0.289, loss_ctc=58.170, loss_att=51.648, acc=0.701, loss=53.605, backward_time=0.405, grad_norm=30.432, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.311e-04, train_time=1.794 +[gpub078:0/16] 2024-01-25 13:49:30,103 (trainer:737) INFO: 10epoch:train:1901-2000batch: iter_time=8.646e-05, forward_time=0.302, loss_ctc=51.604, loss_att=50.309, acc=0.710, loss=50.698, backward_time=0.401, grad_norm=27.550, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.094, optim0_lr0=3.309e-04, train_time=1.341 +[gpub078:0/16] 2024-01-25 13:52:27,170 (trainer:737) INFO: 10epoch:train:2001-2100batch: iter_time=9.998e-05, forward_time=0.368, loss_ctc=60.219, loss_att=61.824, acc=0.679, loss=61.342, backward_time=0.410, grad_norm=30.773, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.100, optim0_lr0=3.308e-04, train_time=1.770 +[gpub078:0/16] 2024-01-25 13:55:04,386 (trainer:737) INFO: 10epoch:train:2101-2200batch: iter_time=9.362e-05, forward_time=0.326, loss_ctc=73.195, loss_att=63.205, acc=0.688, loss=66.202, backward_time=0.432, grad_norm=40.934, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.097, optim0_lr0=3.307e-04, train_time=1.572 +[gpub078:0/16] 2024-01-25 13:57:36,631 (trainer:737) INFO: 10epoch:train:2201-2300batch: iter_time=9.318e-05, forward_time=0.289, loss_ctc=58.035, loss_att=59.891, acc=0.699, loss=59.334, backward_time=0.401, grad_norm=28.538, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.306e-04, train_time=1.521 +[gpub078:0/16] 2024-01-25 13:59:56,316 (trainer:737) INFO: 10epoch:train:2301-2400batch: iter_time=8.984e-05, forward_time=0.297, loss_ctc=59.864, loss_att=52.565, acc=0.702, loss=54.754, backward_time=0.401, grad_norm=30.831, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.305e-04, train_time=1.398 +[gpub078:0/16] 2024-01-25 14:02:31,532 (trainer:737) INFO: 10epoch:train:2401-2500batch: iter_time=9.576e-05, forward_time=0.333, loss_ctc=54.866, loss_att=54.102, acc=0.699, loss=54.331, backward_time=0.447, grad_norm=29.434, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.099, optim0_lr0=3.303e-04, train_time=1.552 +[gpub078:0/16] 2024-01-25 14:02:51,565 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub078:0/16] 2024-01-25 14:03:10,474 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 14:03:14,104 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 14:03:14,104 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub078:0/16] 2024-01-25 14:03:14,107 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 14:11:21,755 (trainer:737) INFO: 10epoch:train:2501-2600batch: iter_time=3.802, forward_time=0.326, loss_ctc=59.893, loss_att=57.260, acc=0.698, loss=58.050, backward_time=0.416, grad_norm=29.737, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.095, optim0_lr0=3.302e-04, train_time=5.302 +[gpub078:0/16] 2024-01-25 14:13:39,872 (trainer:737) INFO: 10epoch:train:2601-2700batch: iter_time=7.692e-05, forward_time=0.291, loss_ctc=57.962, loss_att=55.679, acc=0.706, loss=56.364, backward_time=0.403, grad_norm=31.098, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.301e-04, train_time=1.381 +[gpub078:0/16] 2024-01-25 14:16:15,004 (trainer:737) INFO: 10epoch:train:2701-2800batch: iter_time=7.921e-05, forward_time=0.297, loss_ctc=44.274, loss_att=42.423, acc=0.702, loss=42.978, backward_time=0.396, grad_norm=23.932, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.094, optim0_lr0=3.300e-04, train_time=1.552 +[gpub078:0/16] 2024-01-25 14:19:22,280 (trainer:737) INFO: 10epoch:train:2801-2900batch: iter_time=8.719e-05, forward_time=0.385, loss_ctc=63.750, loss_att=67.663, acc=0.684, loss=66.490, backward_time=0.453, grad_norm=31.926, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.101, optim0_lr0=3.299e-04, train_time=1.872 +[gpub078:0/16] 2024-01-25 14:21:50,661 (trainer:737) INFO: 10epoch:train:2901-3000batch: iter_time=8.508e-05, forward_time=0.288, loss_ctc=52.986, loss_att=52.874, acc=0.698, loss=52.908, backward_time=0.401, grad_norm=31.178, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.093, optim0_lr0=3.297e-04, train_time=1.484 +[gpub078:0/16] 2024-01-25 14:24:08,549 (trainer:737) INFO: 10epoch:train:3001-3100batch: iter_time=8.173e-05, forward_time=0.297, loss_ctc=55.249, loss_att=52.877, acc=0.725, loss=53.589, backward_time=0.404, grad_norm=27.283, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.296e-04, train_time=1.378 +[gpub078:0/16] 2024-01-25 14:26:49,807 (trainer:737) INFO: 10epoch:train:3101-3200batch: iter_time=9.990e-05, forward_time=0.349, loss_ctc=58.435, loss_att=54.958, acc=0.698, loss=56.001, backward_time=0.408, grad_norm=30.684, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.096, optim0_lr0=3.295e-04, train_time=1.613 +[gpub078:0/16] 2024-01-25 14:29:43,299 (trainer:737) INFO: 10epoch:train:3201-3300batch: iter_time=1.905e-04, forward_time=0.327, loss_ctc=52.467, loss_att=54.359, acc=0.693, loss=53.791, backward_time=0.442, grad_norm=26.966, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.096, optim0_lr0=3.294e-04, train_time=1.734 +[gpub078:0/16] 2024-01-25 14:32:12,501 (trainer:737) INFO: 10epoch:train:3301-3400batch: iter_time=8.749e-05, forward_time=0.290, loss_ctc=65.804, loss_att=62.351, acc=0.685, loss=63.387, backward_time=0.404, grad_norm=36.947, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.293e-04, train_time=1.492 +[gpub078:0/16] 2024-01-25 14:34:27,332 (trainer:737) INFO: 10epoch:train:3401-3500batch: iter_time=1.139e-04, forward_time=0.299, loss_ctc=68.702, loss_att=69.961, acc=0.673, loss=69.583, backward_time=0.405, grad_norm=37.000, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.292e-04, train_time=1.348 +[gpub078:0/16] 2024-01-25 14:37:10,432 (trainer:737) INFO: 10epoch:train:3501-3600batch: iter_time=1.007e-04, forward_time=0.382, loss_ctc=52.384, loss_att=41.427, acc=0.735, loss=44.714, backward_time=0.415, grad_norm=25.069, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.098, optim0_lr0=3.290e-04, train_time=1.631 +[gpub078:0/16] 2024-01-25 14:40:10,712 (trainer:737) INFO: 10epoch:train:3601-3700batch: iter_time=8.486e-05, forward_time=0.326, loss_ctc=61.927, loss_att=64.510, acc=0.686, loss=63.735, backward_time=0.424, grad_norm=34.030, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.098, optim0_lr0=3.289e-04, train_time=1.803 +[gpub078:0/16] 2024-01-25 14:41:36,722 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub078:0/16] 2024-01-25 14:41:55,728 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 14:41:59,266 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 14:41:59,266 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub078:0/16] 2024-01-25 14:41:59,269 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 14:48:23,395 (trainer:737) INFO: 10epoch:train:3701-3800batch: iter_time=3.523, forward_time=0.293, loss_ctc=56.582, loss_att=54.478, acc=0.717, loss=55.109, backward_time=0.402, grad_norm=27.827, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.288e-04, train_time=4.926 +[gpub078:0/16] 2024-01-25 14:51:01,235 (trainer:737) INFO: 10epoch:train:3801-3900batch: iter_time=8.743e-05, forward_time=0.287, loss_ctc=57.131, loss_att=54.384, acc=0.707, loss=55.208, backward_time=0.400, grad_norm=29.491, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.287e-04, train_time=1.578 +[gpub078:0/16] 2024-01-25 14:53:41,254 (trainer:737) INFO: 10epoch:train:3901-4000batch: iter_time=6.340e-04, forward_time=0.420, loss_ctc=50.925, loss_att=50.353, acc=0.717, loss=50.525, backward_time=0.447, grad_norm=27.148, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.099, optim0_lr0=3.286e-04, train_time=1.601 +[gpub078:0/16] 2024-01-25 14:56:05,821 (trainer:737) INFO: 10epoch:train:4001-4100batch: iter_time=8.734e-05, forward_time=0.289, loss_ctc=49.802, loss_att=51.380, acc=0.706, loss=50.906, backward_time=0.402, grad_norm=26.233, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.284e-04, train_time=1.445 +[gpub078:0/16] 2024-01-25 14:58:54,985 (trainer:737) INFO: 10epoch:train:4101-4200batch: iter_time=8.404e-05, forward_time=0.296, loss_ctc=56.742, loss_att=64.792, acc=0.700, loss=62.377, backward_time=0.403, grad_norm=31.620, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.093, optim0_lr0=3.283e-04, train_time=1.691 +[gpub078:0/16] 2024-01-25 15:01:40,139 (trainer:737) INFO: 10epoch:train:4201-4300batch: iter_time=8.835e-05, forward_time=0.404, loss_ctc=58.461, loss_att=59.609, acc=0.708, loss=59.264, backward_time=0.417, grad_norm=30.181, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.097, optim0_lr0=3.282e-04, train_time=1.652 +[gpub078:0/16] 2024-01-25 15:04:17,817 (trainer:737) INFO: 10epoch:train:4301-4400batch: iter_time=8.586e-05, forward_time=0.375, loss_ctc=57.453, loss_att=51.547, acc=0.714, loss=53.319, backward_time=0.441, grad_norm=29.176, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.095, optim0_lr0=3.281e-04, train_time=1.577 +[gpub078:0/16] 2024-01-25 15:07:15,881 (trainer:737) INFO: 10epoch:train:4401-4500batch: iter_time=9.994e-05, forward_time=0.295, loss_ctc=51.351, loss_att=51.542, acc=0.714, loss=51.484, backward_time=0.400, grad_norm=27.321, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.094, optim0_lr0=3.280e-04, train_time=1.780 +[gpub078:0/16] 2024-01-25 15:09:44,934 (trainer:737) INFO: 10epoch:train:4501-4600batch: iter_time=9.178e-05, forward_time=0.366, loss_ctc=59.152, loss_att=61.081, acc=0.695, loss=60.502, backward_time=0.450, grad_norm=31.683, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.099, optim0_lr0=3.279e-04, train_time=1.490 +[gpub078:0/16] 2024-01-25 15:12:03,114 (trainer:737) INFO: 10epoch:train:4601-4700batch: iter_time=9.028e-05, forward_time=0.310, loss_ctc=69.131, loss_att=62.155, acc=0.701, loss=64.248, backward_time=0.429, grad_norm=40.100, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.096, optim0_lr0=3.277e-04, train_time=1.382 +[gpub078:0/16] 2024-01-25 15:15:19,017 (trainer:737) INFO: 10epoch:train:4701-4800batch: iter_time=9.465e-05, forward_time=0.303, loss_ctc=57.065, loss_att=60.333, acc=0.710, loss=59.352, backward_time=0.412, grad_norm=28.948, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.094, optim0_lr0=3.276e-04, train_time=1.959 +[gpub078:0/16] 2024-01-25 15:17:52,339 (trainer:737) INFO: 10epoch:train:4801-4900batch: iter_time=8.924e-05, forward_time=0.397, loss_ctc=59.768, loss_att=54.309, acc=0.707, loss=55.947, backward_time=0.429, grad_norm=32.311, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.102, optim0_lr0=3.275e-04, train_time=1.533 +[gpub078:0/16] 2024-01-25 15:20:37,474 (trainer:737) INFO: 10epoch:train:4901-5000batch: iter_time=9.122e-05, forward_time=0.325, loss_ctc=54.179, loss_att=53.790, acc=0.710, loss=53.907, backward_time=0.405, grad_norm=30.023, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.097, optim0_lr0=3.274e-04, train_time=1.651 +[gpub078:0/16] 2024-01-25 15:20:57,644 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub078:0/16] 2024-01-25 15:21:16,469 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 15:21:20,229 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 15:21:20,230 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub078:0/16] 2024-01-25 15:21:20,235 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 15:29:12,537 (trainer:737) INFO: 10epoch:train:5001-5100batch: iter_time=3.494, forward_time=0.337, loss_ctc=58.630, loss_att=58.305, acc=0.699, loss=58.403, backward_time=0.428, grad_norm=29.512, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.094, optim0_lr0=3.273e-04, train_time=5.151 +[gpub078:0/16] 2024-01-25 15:32:00,511 (trainer:737) INFO: 10epoch:train:5101-5200batch: iter_time=9.257e-05, forward_time=0.289, loss_ctc=57.155, loss_att=55.631, acc=0.708, loss=56.088, backward_time=0.404, grad_norm=31.581, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.271e-04, train_time=1.680 +[gpub078:0/16] 2024-01-25 15:34:45,572 (trainer:737) INFO: 10epoch:train:5201-5300batch: iter_time=1.024e-04, forward_time=0.356, loss_ctc=43.598, loss_att=42.672, acc=0.702, loss=42.950, backward_time=0.433, grad_norm=24.737, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.106, optim0_lr0=3.270e-04, train_time=1.650 +[gpub078:0/16] 2024-01-25 15:37:28,001 (trainer:737) INFO: 10epoch:train:5301-5400batch: iter_time=1.002e-04, forward_time=0.324, loss_ctc=62.766, loss_att=67.227, acc=0.688, loss=65.889, backward_time=0.432, grad_norm=30.619, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.095, optim0_lr0=3.269e-04, train_time=1.624 +[gpub078:0/16] 2024-01-25 15:39:48,077 (trainer:737) INFO: 10epoch:train:5401-5500batch: iter_time=9.045e-05, forward_time=0.292, loss_ctc=52.315, loss_att=51.894, acc=0.704, loss=52.020, backward_time=0.401, grad_norm=27.525, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.268e-04, train_time=1.401 +[gpub078:0/16] 2024-01-25 15:43:08,424 (trainer:737) INFO: 10epoch:train:5501-5600batch: iter_time=8.302e-05, forward_time=0.368, loss_ctc=54.987, loss_att=52.710, acc=0.729, loss=53.393, backward_time=0.412, grad_norm=27.354, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.094, optim0_lr0=3.267e-04, train_time=2.003 +[gpub078:0/16] 2024-01-25 15:45:21,686 (trainer:737) INFO: 10epoch:train:5601-5700batch: iter_time=9.173e-05, forward_time=0.290, loss_ctc=57.089, loss_att=53.887, acc=0.705, loss=54.848, backward_time=0.402, grad_norm=29.115, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.266e-04, train_time=1.332 +[gpub078:0/16] 2024-01-25 15:48:14,333 (trainer:737) INFO: 10epoch:train:5701-5800batch: iter_time=9.057e-05, forward_time=0.328, loss_ctc=52.520, loss_att=54.565, acc=0.693, loss=53.951, backward_time=0.420, grad_norm=28.059, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.099, optim0_lr0=3.265e-04, train_time=1.726 +[gpub078:0/16] 2024-01-25 15:50:34,756 (trainer:737) INFO: 10epoch:train:5801-5900batch: iter_time=8.857e-05, forward_time=0.313, loss_ctc=65.293, loss_att=62.814, acc=0.683, loss=63.558, backward_time=0.401, grad_norm=35.714, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.263e-04, train_time=1.404 +[gpub078:0/16] 2024-01-25 15:53:18,986 (trainer:737) INFO: 10epoch:train:5901-6000batch: iter_time=9.542e-05, forward_time=0.345, loss_ctc=66.342, loss_att=69.279, acc=0.676, loss=68.398, backward_time=0.486, grad_norm=36.251, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.100, optim0_lr0=3.262e-04, train_time=1.642 +[gpub078:0/16] 2024-01-25 15:56:16,240 (trainer:737) INFO: 10epoch:train:6001-6100batch: iter_time=9.344e-05, forward_time=0.289, loss_ctc=51.954, loss_att=40.915, acc=0.738, loss=44.227, backward_time=0.399, grad_norm=26.116, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.261e-04, train_time=1.772 +[gpub078:0/16] 2024-01-25 15:58:46,515 (trainer:737) INFO: 10epoch:train:6101-6200batch: iter_time=0.002, forward_time=0.323, loss_ctc=61.354, loss_att=63.455, acc=0.691, loss=62.825, backward_time=0.425, grad_norm=31.971, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.097, optim0_lr0=3.260e-04, train_time=1.501 +[gpub078:0/16] 2024-01-25 16:00:15,844 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub078:0/16] 2024-01-25 16:00:35,122 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 16:00:38,801 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 16:00:38,802 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub078:0/16] 2024-01-25 16:00:38,805 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 16:07:45,157 (trainer:737) INFO: 10epoch:train:6201-6300batch: iter_time=3.827, forward_time=0.382, loss_ctc=56.655, loss_att=53.957, acc=0.718, loss=54.767, backward_time=0.413, grad_norm=28.984, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.095, optim0_lr0=3.259e-04, train_time=5.387 +[gpub078:0/16] 2024-01-25 16:10:27,638 (trainer:737) INFO: 10epoch:train:6301-6400batch: iter_time=9.327e-05, forward_time=0.288, loss_ctc=55.513, loss_att=54.091, acc=0.707, loss=54.518, backward_time=0.400, grad_norm=30.372, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.258e-04, train_time=1.625 +[gpub078:0/16] 2024-01-25 16:13:15,582 (trainer:737) INFO: 10epoch:train:6401-6500batch: iter_time=8.424e-05, forward_time=0.350, loss_ctc=49.819, loss_att=49.603, acc=0.719, loss=49.668, backward_time=0.423, grad_norm=26.540, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.095, optim0_lr0=3.256e-04, train_time=1.679 +[gpub078:0/16] 2024-01-25 16:16:13,840 (trainer:737) INFO: 10epoch:train:6501-6600batch: iter_time=8.405e-05, forward_time=0.291, loss_ctc=49.329, loss_att=50.394, acc=0.709, loss=50.074, backward_time=0.402, grad_norm=26.093, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.255e-04, train_time=1.783 +[gpub078:0/16] 2024-01-25 16:19:04,279 (trainer:737) INFO: 10epoch:train:6601-6700batch: iter_time=2.218e-04, forward_time=0.398, loss_ctc=56.519, loss_att=63.799, acc=0.702, loss=61.615, backward_time=0.423, grad_norm=31.524, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.102, optim0_lr0=3.254e-04, train_time=1.704 +[gpub078:0/16] 2024-01-25 16:21:42,767 (trainer:737) INFO: 10epoch:train:6701-6800batch: iter_time=9.247e-05, forward_time=0.320, loss_ctc=57.644, loss_att=58.663, acc=0.711, loss=58.357, backward_time=0.405, grad_norm=28.913, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.093, optim0_lr0=3.253e-04, train_time=1.585 +[gpub078:0/16] 2024-01-25 16:25:24,582 (trainer:737) INFO: 10epoch:train:6801-6900batch: iter_time=1.097e-04, forward_time=0.325, loss_ctc=56.287, loss_att=51.735, acc=0.716, loss=53.100, backward_time=0.430, grad_norm=28.213, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.096, optim0_lr0=3.252e-04, train_time=2.218 +[gpub078:0/16] 2024-01-25 16:28:06,068 (trainer:737) INFO: 10epoch:train:6901-7000batch: iter_time=9.316e-05, forward_time=0.409, loss_ctc=50.260, loss_att=51.194, acc=0.713, loss=50.914, backward_time=0.412, grad_norm=26.742, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.104, optim0_lr0=3.251e-04, train_time=1.615 +[gpub078:0/16] 2024-01-25 16:31:08,407 (trainer:737) INFO: 10epoch:train:7001-7100batch: iter_time=9.668e-05, forward_time=0.290, loss_ctc=57.949, loss_att=60.359, acc=0.698, loss=59.636, backward_time=0.402, grad_norm=29.442, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.250e-04, train_time=1.823 +[gpub078:0/16] 2024-01-25 16:33:38,121 (trainer:737) INFO: 10epoch:train:7101-7200batch: iter_time=9.352e-05, forward_time=0.334, loss_ctc=68.503, loss_att=61.789, acc=0.705, loss=63.803, backward_time=0.418, grad_norm=37.747, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.097, optim0_lr0=3.248e-04, train_time=1.496 +[gpub078:0/16] 2024-01-25 16:36:38,183 (trainer:737) INFO: 10epoch:train:7201-7300batch: iter_time=9.451e-05, forward_time=0.347, loss_ctc=54.901, loss_att=60.080, acc=0.714, loss=58.527, backward_time=0.437, grad_norm=28.830, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.104, optim0_lr0=3.247e-04, train_time=1.801 +[gpub078:0/16] 2024-01-25 16:39:17,630 (trainer:737) INFO: 10epoch:train:7301-7400batch: iter_time=8.955e-05, forward_time=0.288, loss_ctc=57.848, loss_att=53.294, acc=0.711, loss=54.660, backward_time=0.400, grad_norm=29.874, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.246e-04, train_time=1.594 +[gpub078:0/16] 2024-01-25 16:41:49,708 (trainer:737) INFO: 10epoch:train:7401-7500batch: iter_time=9.283e-05, forward_time=0.288, loss_ctc=52.940, loss_att=52.943, acc=0.714, loss=52.942, backward_time=0.402, grad_norm=27.898, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.245e-04, train_time=1.521 +[gpub078:0/16] 2024-01-25 16:42:09,737 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub078:0/16] 2024-01-25 16:42:28,691 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 16:42:32,150 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 16:42:32,150 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub078:0/16] 2024-01-25 16:42:32,153 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 16:50:11,156 (trainer:737) INFO: 10epoch:train:7501-7600batch: iter_time=3.248, forward_time=0.428, loss_ctc=58.343, loss_att=57.681, acc=0.701, loss=57.879, backward_time=0.428, grad_norm=28.598, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.098, optim0_lr0=3.244e-04, train_time=5.013 +[gpub078:0/16] 2024-01-25 16:52:45,916 (trainer:737) INFO: 10epoch:train:7601-7700batch: iter_time=8.409e-05, forward_time=0.293, loss_ctc=56.441, loss_att=55.579, acc=0.711, loss=55.837, backward_time=0.402, grad_norm=30.569, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.243e-04, train_time=1.548 +[gpub078:0/16] 2024-01-25 16:55:02,508 (trainer:737) INFO: 10epoch:train:7701-7800batch: iter_time=8.102e-05, forward_time=0.287, loss_ctc=43.519, loss_att=42.565, acc=0.702, loss=42.851, backward_time=0.400, grad_norm=24.531, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.242e-04, train_time=1.366 +[gpub078:0/16] 2024-01-25 16:58:28,520 (trainer:737) INFO: 10epoch:train:7801-7900batch: iter_time=8.736e-05, forward_time=0.383, loss_ctc=61.511, loss_att=65.834, acc=0.691, loss=64.537, backward_time=0.453, grad_norm=30.074, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.103, optim0_lr0=3.240e-04, train_time=2.060 +[gpub078:0/16] 2024-01-25 17:00:43,522 (trainer:737) INFO: 10epoch:train:7901-8000batch: iter_time=9.009e-05, forward_time=0.288, loss_ctc=51.652, loss_att=51.292, acc=0.709, loss=51.400, backward_time=0.400, grad_norm=27.482, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.239e-04, train_time=1.349 +[gpub078:0/16] 2024-01-25 17:03:18,835 (trainer:737) INFO: 10epoch:train:8001-8100batch: iter_time=8.238e-05, forward_time=0.291, loss_ctc=54.452, loss_att=52.067, acc=0.731, loss=52.782, backward_time=0.404, grad_norm=27.877, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.238e-04, train_time=1.553 +[gpub078:0/16] 2024-01-25 17:05:53,357 (trainer:737) INFO: 10epoch:train:8101-8200batch: iter_time=9.360e-05, forward_time=0.289, loss_ctc=56.752, loss_att=53.267, acc=0.707, loss=54.313, backward_time=0.401, grad_norm=30.539, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.237e-04, train_time=1.545 +[gpub078:0/16] 2024-01-25 17:09:57,153 (trainer:737) INFO: 10epoch:train:8201-8300batch: iter_time=0.022, forward_time=0.397, loss_ctc=51.272, loss_att=53.642, acc=0.696, loss=52.931, backward_time=0.448, grad_norm=28.062, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.106, optim0_lr0=3.236e-04, train_time=2.438 +[gpub078:0/16] 2024-01-25 17:12:19,393 (trainer:737) INFO: 10epoch:train:8301-8400batch: iter_time=8.818e-05, forward_time=0.294, loss_ctc=64.208, loss_att=62.517, acc=0.687, loss=63.024, backward_time=0.402, grad_norm=35.446, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.235e-04, train_time=1.422 +[gpub078:0/16] 2024-01-25 17:15:02,659 (trainer:737) INFO: 10epoch:train:8401-8500batch: iter_time=9.320e-05, forward_time=0.291, loss_ctc=64.836, loss_att=68.587, acc=0.679, loss=67.462, backward_time=0.405, grad_norm=36.963, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.234e-04, train_time=1.633 +[gpub078:0/16] 2024-01-25 17:17:39,599 (trainer:737) INFO: 10epoch:train:8501-8600batch: iter_time=9.580e-05, forward_time=0.288, loss_ctc=51.084, loss_att=40.243, acc=0.741, loss=43.495, backward_time=0.400, grad_norm=25.135, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.233e-04, train_time=1.569 +[gpub078:0/16] 2024-01-25 17:20:39,034 (trainer:737) INFO: 10epoch:train:8601-8700batch: iter_time=8.996e-05, forward_time=0.482, loss_ctc=60.526, loss_att=62.773, acc=0.692, loss=62.099, backward_time=0.480, grad_norm=33.893, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.102, optim0_lr0=3.231e-04, train_time=1.794 +[gpub078:0/16] 2024-01-25 17:22:54,315 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub078:0/16] 2024-01-25 17:23:13,352 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 17:23:16,891 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 17:23:16,891 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub078:0/16] 2024-01-25 17:23:16,916 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 17:29:34,427 (trainer:737) INFO: 10epoch:train:8701-8800batch: iter_time=3.498, forward_time=0.289, loss_ctc=56.158, loss_att=53.283, acc=0.721, loss=54.145, backward_time=0.403, grad_norm=29.152, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.230e-04, train_time=5.354 +[gpub078:0/16] 2024-01-25 17:32:09,556 (trainer:737) INFO: 10epoch:train:8801-8900batch: iter_time=9.543e-05, forward_time=0.361, loss_ctc=55.327, loss_att=52.379, acc=0.713, loss=53.263, backward_time=0.432, grad_norm=28.779, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.107, optim0_lr0=3.229e-04, train_time=1.551 +[gpub078:0/16] 2024-01-25 17:35:28,560 (trainer:737) INFO: 10epoch:train:8901-9000batch: iter_time=9.952e-05, forward_time=0.312, loss_ctc=49.431, loss_att=49.439, acc=0.721, loss=49.436, backward_time=0.410, grad_norm=26.886, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.093, optim0_lr0=3.228e-04, train_time=1.990 +[gpub078:0/16] 2024-01-25 17:37:54,185 (trainer:737) INFO: 10epoch:train:9001-9100batch: iter_time=9.668e-05, forward_time=0.341, loss_ctc=49.025, loss_att=50.203, acc=0.710, loss=49.850, backward_time=0.422, grad_norm=25.343, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.100, optim0_lr0=3.227e-04, train_time=1.456 +[gpub078:0/16] 2024-01-25 17:40:37,857 (trainer:737) INFO: 10epoch:train:9101-9200batch: iter_time=8.916e-05, forward_time=0.347, loss_ctc=55.831, loss_att=63.301, acc=0.704, loss=61.060, backward_time=0.429, grad_norm=30.719, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.096, optim0_lr0=3.226e-04, train_time=1.636 +[gpub078:0/16] 2024-01-25 17:43:16,706 (trainer:737) INFO: 10epoch:train:9201-9300batch: iter_time=8.450e-05, forward_time=0.294, loss_ctc=57.240, loss_att=58.784, acc=0.711, loss=58.321, backward_time=0.406, grad_norm=28.788, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.225e-04, train_time=1.588 +[gpub078:0/16] 2024-01-25 17:46:16,290 (trainer:737) INFO: 10epoch:train:9301-9400batch: iter_time=9.380e-05, forward_time=0.367, loss_ctc=55.382, loss_att=50.140, acc=0.720, loss=51.713, backward_time=0.459, grad_norm=27.118, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.105, optim0_lr0=3.224e-04, train_time=1.796 +[gpub078:0/16] 2024-01-25 17:48:29,360 (trainer:737) INFO: 10epoch:train:9401-9500batch: iter_time=8.899e-05, forward_time=0.289, loss_ctc=49.947, loss_att=50.319, acc=0.719, loss=50.208, backward_time=0.403, grad_norm=26.938, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.222e-04, train_time=1.330 +[gpub078:0/16] 2024-01-25 17:51:03,363 (trainer:737) INFO: 10epoch:train:9501-9600batch: iter_time=1.001e-04, forward_time=0.292, loss_ctc=57.571, loss_att=59.913, acc=0.701, loss=59.211, backward_time=0.405, grad_norm=30.205, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.221e-04, train_time=1.540 +[gpub078:0/16] 2024-01-25 17:54:23,957 (trainer:737) INFO: 10epoch:train:9601-9700batch: iter_time=8.929e-05, forward_time=0.404, loss_ctc=66.085, loss_att=61.361, acc=0.704, loss=62.778, backward_time=0.459, grad_norm=37.852, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.103, optim0_lr0=3.220e-04, train_time=2.006 +[gpub078:0/16] 2024-01-25 17:56:52,625 (trainer:737) INFO: 10epoch:train:9701-9800batch: iter_time=9.116e-05, forward_time=0.291, loss_ctc=54.976, loss_att=59.368, acc=0.715, loss=58.051, backward_time=0.404, grad_norm=29.550, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.219e-04, train_time=1.486 +[gpub078:0/16] 2024-01-25 17:59:22,169 (trainer:737) INFO: 10epoch:train:9801-9900batch: iter_time=9.309e-05, forward_time=0.403, loss_ctc=57.203, loss_att=52.772, acc=0.714, loss=54.102, backward_time=0.442, grad_norm=30.211, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.100, optim0_lr0=3.218e-04, train_time=1.495 +[gpub078:0/16] 2024-01-25 18:02:08,528 (trainer:737) INFO: 10epoch:train:9901-10000batch: iter_time=9.984e-05, forward_time=0.291, loss_ctc=53.291, loss_att=52.363, acc=0.716, loss=52.641, backward_time=0.402, grad_norm=30.148, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.217e-04, train_time=1.664 +[gpub078:0/16] 2024-01-25 18:02:28,556 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub078:0/16] 2024-01-25 18:02:47,774 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 18:02:51,688 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 18:02:51,688 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub078:0/16] 2024-01-25 18:02:51,691 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 18:11:08,109 (trainer:737) INFO: 10epoch:train:10001-10100batch: iter_time=3.626, forward_time=0.287, loss_ctc=58.620, loss_att=56.903, acc=0.712, loss=57.418, backward_time=0.400, grad_norm=28.386, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.216e-04, train_time=5.396 +[gpub078:0/16] 2024-01-25 18:14:30,222 (trainer:737) INFO: 10epoch:train:10101-10200batch: iter_time=8.812e-05, forward_time=0.376, loss_ctc=55.597, loss_att=54.459, acc=0.723, loss=54.800, backward_time=0.486, grad_norm=31.624, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.101, optim0_lr0=3.215e-04, train_time=2.021 +[gpub078:0/16] 2024-01-25 18:16:46,510 (trainer:737) INFO: 10epoch:train:10201-10300batch: iter_time=8.826e-05, forward_time=0.288, loss_ctc=43.104, loss_att=41.287, acc=0.720, loss=41.832, backward_time=0.400, grad_norm=24.783, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.214e-04, train_time=1.363 +[gpub078:0/16] 2024-01-25 18:20:09,441 (trainer:737) INFO: 10epoch:train:10301-10400batch: iter_time=9.505e-05, forward_time=0.293, loss_ctc=61.207, loss_att=66.058, acc=0.701, loss=64.603, backward_time=0.406, grad_norm=30.952, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.212e-04, train_time=2.029 +[gpub078:0/16] 2024-01-25 18:23:08,355 (trainer:737) INFO: 10epoch:train:10401-10500batch: iter_time=9.221e-05, forward_time=0.408, loss_ctc=51.053, loss_att=53.925, acc=0.707, loss=53.063, backward_time=0.477, grad_norm=27.722, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.104, optim0_lr0=3.211e-04, train_time=1.789 +[gpub078:0/16] 2024-01-25 18:25:47,529 (trainer:737) INFO: 10epoch:train:10501-10600batch: iter_time=8.568e-05, forward_time=0.294, loss_ctc=53.839, loss_att=52.421, acc=0.737, loss=52.847, backward_time=0.406, grad_norm=26.369, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.210e-04, train_time=1.592 +[gpub078:0/16] 2024-01-25 18:28:48,392 (trainer:737) INFO: 10epoch:train:10601-10700batch: iter_time=8.747e-05, forward_time=0.394, loss_ctc=55.782, loss_att=54.146, acc=0.711, loss=54.637, backward_time=0.438, grad_norm=30.130, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.117, optim0_lr0=3.209e-04, train_time=1.808 +[gpub078:0/16] 2024-01-25 18:31:17,539 (trainer:737) INFO: 10epoch:train:10701-10800batch: iter_time=9.314e-05, forward_time=0.288, loss_ctc=51.363, loss_att=55.020, acc=0.701, loss=53.923, backward_time=0.401, grad_norm=28.133, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.208e-04, train_time=1.490 +[gpub078:0/16] 2024-01-25 18:33:55,461 (trainer:737) INFO: 10epoch:train:10801-10900batch: iter_time=8.961e-05, forward_time=0.288, loss_ctc=62.513, loss_att=60.580, acc=0.703, loss=61.159, backward_time=0.402, grad_norm=32.759, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.093, optim0_lr0=3.207e-04, train_time=1.580 +[gpub078:0/16] 2024-01-25 18:36:51,894 (trainer:737) INFO: 10epoch:train:10901-11000batch: iter_time=8.912e-05, forward_time=0.406, loss_ctc=63.814, loss_att=67.533, acc=0.695, loss=66.417, backward_time=0.433, grad_norm=35.415, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.106, optim0_lr0=3.206e-04, train_time=1.764 +[gpub078:0/16] 2024-01-25 18:40:04,881 (trainer:737) INFO: 10epoch:train:11001-11100batch: iter_time=8.794e-05, forward_time=0.287, loss_ctc=50.831, loss_att=40.877, acc=0.739, loss=43.863, backward_time=0.398, grad_norm=24.770, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.205e-04, train_time=1.929 +[gpub078:0/16] 2024-01-25 18:42:35,361 (trainer:737) INFO: 10epoch:train:11101-11200batch: iter_time=8.879e-05, forward_time=0.292, loss_ctc=59.387, loss_att=64.809, acc=0.697, loss=63.182, backward_time=0.402, grad_norm=31.943, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.204e-04, train_time=1.505 +[gpub078:0/16] 2024-01-25 18:44:56,772 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub078:0/16] 2024-01-25 18:45:15,727 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 18:45:19,667 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 18:45:19,667 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub078:0/16] 2024-01-25 18:45:19,670 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 18:52:35,033 (trainer:737) INFO: 10epoch:train:11201-11300batch: iter_time=3.725, forward_time=0.430, loss_ctc=55.915, loss_att=52.446, acc=0.729, loss=53.487, backward_time=0.433, grad_norm=26.860, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.106, optim0_lr0=3.203e-04, train_time=5.996 +[gpub078:0/16] 2024-01-25 18:56:50,970 (trainer:737) INFO: 10epoch:train:11301-11400batch: iter_time=8.831e-05, forward_time=0.384, loss_ctc=55.008, loss_att=51.207, acc=0.714, loss=52.347, backward_time=0.492, grad_norm=28.151, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.102, optim0_lr0=3.201e-04, train_time=2.559 +[gpub078:0/16] 2024-01-25 19:00:19,739 (trainer:737) INFO: 10epoch:train:11401-11500batch: iter_time=8.567e-05, forward_time=0.288, loss_ctc=49.077, loss_att=48.249, acc=0.723, loss=48.498, backward_time=0.398, grad_norm=26.647, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.200e-04, train_time=2.088 +[gpub078:0/16] 2024-01-25 19:03:07,458 (trainer:737) INFO: 10epoch:train:11501-11600batch: iter_time=9.608e-05, forward_time=0.289, loss_ctc=48.918, loss_att=49.018, acc=0.714, loss=48.988, backward_time=0.399, grad_norm=24.814, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.199e-04, train_time=1.677 +[gpub078:0/16] 2024-01-25 19:06:36,385 (trainer:737) INFO: 10epoch:train:11601-11700batch: iter_time=9.144e-05, forward_time=0.355, loss_ctc=55.913, loss_att=63.019, acc=0.707, loss=60.887, backward_time=0.508, grad_norm=30.360, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.104, optim0_lr0=3.198e-04, train_time=2.089 +[gpub078:0/16] 2024-01-25 19:09:47,809 (trainer:737) INFO: 10epoch:train:11701-11800batch: iter_time=8.681e-05, forward_time=0.292, loss_ctc=57.431, loss_att=57.751, acc=0.715, loss=57.655, backward_time=0.401, grad_norm=29.153, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.197e-04, train_time=1.914 +[gpub078:0/16] 2024-01-25 19:12:54,023 (trainer:737) INFO: 10epoch:train:11801-11900batch: iter_time=8.619e-05, forward_time=0.342, loss_ctc=55.142, loss_att=50.221, acc=0.721, loss=51.697, backward_time=0.491, grad_norm=26.983, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.099, optim0_lr0=3.196e-04, train_time=1.862 +[gpub078:0/16] 2024-01-25 19:15:51,710 (trainer:737) INFO: 10epoch:train:11901-12000batch: iter_time=7.958e-05, forward_time=0.289, loss_ctc=50.200, loss_att=50.622, acc=0.717, loss=50.495, backward_time=0.398, grad_norm=27.627, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.195e-04, train_time=1.777 +[gpub078:0/16] 2024-01-25 19:20:00,176 (trainer:737) INFO: 10epoch:train:12001-12100batch: iter_time=8.990e-05, forward_time=0.359, loss_ctc=57.100, loss_att=58.939, acc=0.702, loss=58.387, backward_time=0.492, grad_norm=29.696, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.100, optim0_lr0=3.194e-04, train_time=2.484 +[gpub078:0/16] 2024-01-25 19:23:18,870 (trainer:737) INFO: 10epoch:train:12101-12200batch: iter_time=8.582e-05, forward_time=0.292, loss_ctc=64.869, loss_att=60.269, acc=0.708, loss=61.649, backward_time=0.484, grad_norm=33.772, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.099, optim0_lr0=3.193e-04, train_time=1.986 +[gpub078:0/16] 2024-01-25 19:26:20,930 (trainer:737) INFO: 10epoch:train:12201-12300batch: iter_time=9.559e-05, forward_time=0.331, loss_ctc=54.617, loss_att=58.474, acc=0.716, loss=57.317, backward_time=0.400, grad_norm=29.911, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.192e-04, train_time=1.821 +[gpub078:0/16] 2024-01-25 19:29:41,259 (trainer:737) INFO: 10epoch:train:12301-12400batch: iter_time=9.026e-05, forward_time=0.288, loss_ctc=57.727, loss_att=52.989, acc=0.714, loss=54.410, backward_time=0.397, grad_norm=29.265, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.191e-04, train_time=2.003 +[gpub078:0/16] 2024-01-25 19:33:04,796 (trainer:737) INFO: 10epoch:train:12401-12500batch: iter_time=9.026e-05, forward_time=0.426, loss_ctc=52.636, loss_att=52.950, acc=0.717, loss=52.856, backward_time=0.469, grad_norm=28.595, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.104, optim0_lr0=3.189e-04, train_time=2.035 +[gpub078:0/16] 2024-01-25 19:33:24,976 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub078:0/16] 2024-01-25 19:33:44,032 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 19:33:47,976 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 19:33:47,977 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub078:0/16] 2024-01-25 19:33:47,980 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 19:40:31,594 (trainer:737) INFO: 10epoch:train:12501-12600batch: iter_time=2.860, forward_time=0.288, loss_ctc=58.211, loss_att=58.177, acc=0.703, loss=58.187, backward_time=0.400, grad_norm=30.701, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.188e-04, train_time=4.468 +[gpub078:0/16] 2024-01-25 19:43:30,204 (trainer:737) INFO: 10epoch:train:12601-12700batch: iter_time=8.952e-05, forward_time=0.346, loss_ctc=54.955, loss_att=55.722, acc=0.713, loss=55.492, backward_time=0.460, grad_norm=30.940, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.099, optim0_lr0=3.187e-04, train_time=1.785 +[gpub078:0/16] 2024-01-25 19:46:17,099 (trainer:737) INFO: 10epoch:train:12701-12800batch: iter_time=8.599e-05, forward_time=0.286, loss_ctc=42.784, loss_att=42.645, acc=0.706, loss=42.687, backward_time=0.396, grad_norm=24.979, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.186e-04, train_time=1.669 +[gpub078:0/16] 2024-01-25 19:49:56,967 (trainer:737) INFO: 10epoch:train:12801-12900batch: iter_time=8.962e-05, forward_time=0.553, loss_ctc=60.743, loss_att=66.713, acc=0.692, loss=64.922, backward_time=0.447, grad_norm=31.826, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.113, optim0_lr0=3.185e-04, train_time=2.198 +[gpub078:0/16] 2024-01-25 19:53:02,647 (trainer:737) INFO: 10epoch:train:12901-13000batch: iter_time=9.129e-05, forward_time=0.287, loss_ctc=50.612, loss_att=51.411, acc=0.710, loss=51.171, backward_time=0.398, grad_norm=27.752, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.093, optim0_lr0=3.184e-04, train_time=1.857 +[gpub078:0/16] 2024-01-25 19:56:39,312 (trainer:737) INFO: 10epoch:train:13001-13100batch: iter_time=8.667e-05, forward_time=0.312, loss_ctc=53.202, loss_att=51.313, acc=0.733, loss=51.880, backward_time=0.438, grad_norm=27.191, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.099, optim0_lr0=3.183e-04, train_time=2.166 +[gpub078:0/16] 2024-01-25 19:59:55,919 (trainer:737) INFO: 10epoch:train:13101-13200batch: iter_time=1.023e-04, forward_time=0.439, loss_ctc=55.637, loss_att=53.112, acc=0.710, loss=53.869, backward_time=0.441, grad_norm=29.735, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.119, optim0_lr0=3.182e-04, train_time=1.966 +[gpub078:0/16] 2024-01-25 20:03:19,448 (trainer:737) INFO: 10epoch:train:13201-13300batch: iter_time=1.034e-04, forward_time=0.319, loss_ctc=50.940, loss_att=54.038, acc=0.698, loss=53.109, backward_time=0.433, grad_norm=29.736, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.097, optim0_lr0=3.181e-04, train_time=2.035 +[gpub078:0/16] 2024-01-25 20:06:45,252 (trainer:737) INFO: 10epoch:train:13301-13400batch: iter_time=8.540e-05, forward_time=0.421, loss_ctc=63.442, loss_att=61.859, acc=0.689, loss=62.334, backward_time=0.427, grad_norm=37.237, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.143, optim0_lr0=3.180e-04, train_time=2.058 +[gpub078:0/16] 2024-01-25 20:10:33,823 (trainer:737) INFO: 10epoch:train:13401-13500batch: iter_time=9.664e-05, forward_time=0.361, loss_ctc=62.729, loss_att=68.007, acc=0.680, loss=66.424, backward_time=0.422, grad_norm=36.040, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.111, optim0_lr0=3.179e-04, train_time=2.285 +[gpub078:0/16] 2024-01-25 20:14:19,436 (trainer:737) INFO: 10epoch:train:13501-13600batch: iter_time=9.868e-05, forward_time=0.445, loss_ctc=50.600, loss_att=40.083, acc=0.743, loss=43.238, backward_time=0.428, grad_norm=25.286, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.106, optim0_lr0=3.178e-04, train_time=2.256 +[gpub078:0/16] 2024-01-25 20:16:48,852 (trainer:737) INFO: 10epoch:train:13601-13700batch: iter_time=1.058e-04, forward_time=0.290, loss_ctc=59.491, loss_att=62.588, acc=0.693, loss=61.659, backward_time=0.401, grad_norm=32.938, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.177e-04, train_time=1.494 +[gpub078:0/16] 2024-01-25 20:18:53,488 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub078:0/16] 2024-01-25 20:19:13,486 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 20:19:17,487 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 20:19:17,487 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub078:0/16] 2024-01-25 20:19:17,490 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 20:27:46,565 (trainer:737) INFO: 10epoch:train:13701-13800batch: iter_time=4.512, forward_time=0.484, loss_ctc=55.194, loss_att=52.058, acc=0.721, loss=52.999, backward_time=0.469, grad_norm=29.315, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.098, optim0_lr0=3.176e-04, train_time=6.577 +[gpub078:0/16] 2024-01-25 20:30:21,954 (trainer:737) INFO: 10epoch:train:13801-13900batch: iter_time=8.455e-05, forward_time=0.286, loss_ctc=54.247, loss_att=51.280, acc=0.706, loss=52.170, backward_time=0.400, grad_norm=29.485, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.174e-04, train_time=1.553 +[gpub078:0/16] 2024-01-25 20:33:14,297 (trainer:737) INFO: 10epoch:train:13901-14000batch: iter_time=8.329e-05, forward_time=0.289, loss_ctc=49.262, loss_att=48.281, acc=0.716, loss=48.575, backward_time=0.399, grad_norm=27.793, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.173e-04, train_time=1.723 +[gpub078:0/16] 2024-01-25 20:36:45,822 (trainer:737) INFO: 10epoch:train:14001-14100batch: iter_time=8.861e-05, forward_time=0.489, loss_ctc=48.363, loss_att=49.344, acc=0.701, loss=49.050, backward_time=0.441, grad_norm=25.756, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.112, optim0_lr0=3.172e-04, train_time=2.115 +[gpub078:0/16] 2024-01-25 20:39:32,203 (trainer:737) INFO: 10epoch:train:14101-14200batch: iter_time=8.126e-05, forward_time=0.289, loss_ctc=54.895, loss_att=60.632, acc=0.703, loss=58.911, backward_time=0.403, grad_norm=30.389, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.171e-04, train_time=1.664 +[gpub078:0/16] 2024-01-25 20:42:30,829 (trainer:737) INFO: 10epoch:train:14201-14300batch: iter_time=8.280e-05, forward_time=0.289, loss_ctc=56.393, loss_att=57.286, acc=0.709, loss=57.018, backward_time=0.402, grad_norm=28.098, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.170e-04, train_time=1.785 +[gpub078:0/16] 2024-01-25 20:46:31,645 (trainer:737) INFO: 10epoch:train:14301-14400batch: iter_time=8.460e-05, forward_time=0.461, loss_ctc=55.192, loss_att=48.850, acc=0.715, loss=50.752, backward_time=0.471, grad_norm=29.273, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.110, optim0_lr0=3.169e-04, train_time=2.408 +[gpub078:0/16] 2024-01-25 20:48:51,906 (trainer:737) INFO: 10epoch:train:14401-14500batch: iter_time=8.933e-05, forward_time=0.286, loss_ctc=49.744, loss_att=48.827, acc=0.720, loss=49.102, backward_time=0.401, grad_norm=27.472, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.168e-04, train_time=1.403 +[gpub078:0/16] 2024-01-25 20:52:10,476 (trainer:737) INFO: 10epoch:train:14501-14600batch: iter_time=8.454e-05, forward_time=0.288, loss_ctc=56.770, loss_att=58.770, acc=0.691, loss=58.170, backward_time=0.401, grad_norm=29.349, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.167e-04, train_time=1.986 +[gpub078:0/16] 2024-01-25 20:54:37,903 (trainer:737) INFO: 10epoch:train:14601-14700batch: iter_time=8.977e-05, forward_time=0.314, loss_ctc=64.402, loss_att=60.027, acc=0.699, loss=61.340, backward_time=0.434, grad_norm=34.816, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.097, optim0_lr0=3.166e-04, train_time=1.473 +[gpub078:0/16] 2024-01-25 20:57:52,265 (trainer:737) INFO: 10epoch:train:14701-14800batch: iter_time=9.391e-05, forward_time=0.378, loss_ctc=53.359, loss_att=57.618, acc=0.709, loss=56.341, backward_time=0.436, grad_norm=27.621, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.099, optim0_lr0=3.165e-04, train_time=1.944 +[gpub078:0/16] 2024-01-25 21:00:39,632 (trainer:737) INFO: 10epoch:train:14801-14900batch: iter_time=8.847e-05, forward_time=0.287, loss_ctc=56.753, loss_att=50.479, acc=0.713, loss=52.361, backward_time=0.399, grad_norm=31.144, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.164e-04, train_time=1.673 +[gpub078:0/16] 2024-01-25 21:03:21,412 (trainer:737) INFO: 10epoch:train:14901-15000batch: iter_time=8.427e-05, forward_time=0.320, loss_ctc=52.249, loss_att=51.862, acc=0.711, loss=51.978, backward_time=0.398, grad_norm=30.534, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.093, optim0_lr0=3.163e-04, train_time=1.618 +[gpub078:0/16] 2024-01-25 21:44:17,385 (trainer:343) INFO: 10epoch results: [train] iter_time=0.287, forward_time=0.333, loss_ctc=56.382, loss_att=55.655, acc=0.706, loss=55.873, backward_time=0.421, grad_norm=29.839, clip=100.000, loss_scale=3.282e+26, optim_step_time=0.097, optim0_lr0=3.246e-04, train_time=1.996, time=8 hours, 19 minutes and 36.89 seconds, total_count=180000, gpu_max_cached_mem_GB=42.420, [valid] loss_ctc=52.152, cer_ctc=0.263, loss_att=51.216, acc=0.607, cer=0.381, wer=0.998, loss=51.497, time=40 minutes and 29.1 seconds, total_count=56052, gpu_max_cached_mem_GB=42.420 +[gpub078:0/16] 2024-01-25 21:44:28,241 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub078:0/16] 2024-01-25 21:44:28,350 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/5epoch.pth +[gpub078:0/16] 2024-01-25 21:44:28,350 (trainer:272) INFO: 11/45epoch started. Estimated time to finish: 1 week, 6 days and 13 hours +[gpub078:0/16] 2024-01-25 21:44:28,361 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub078:0/16] 2024-01-25 21:44:46,981 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 21:44:50,759 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 21:44:50,759 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub078:0/16] 2024-01-25 21:44:50,762 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 21:54:16,991 (trainer:737) INFO: 11epoch:train:1-100batch: iter_time=4.162, forward_time=0.297, loss_ctc=63.999, loss_att=55.517, acc=0.704, loss=58.061, backward_time=0.429, grad_norm=32.087, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.096, optim0_lr0=3.162e-04, train_time=5.886 +[gpub078:0/16] 2024-01-25 21:56:51,589 (trainer:737) INFO: 11epoch:train:101-200batch: iter_time=8.385e-05, forward_time=0.390, loss_ctc=73.361, loss_att=74.568, acc=0.650, loss=74.206, backward_time=0.410, grad_norm=54.219, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.100, optim0_lr0=3.161e-04, train_time=1.546 +[gpub078:0/16] 2024-01-25 21:59:14,978 (trainer:737) INFO: 11epoch:train:201-300batch: iter_time=8.818e-05, forward_time=0.353, loss_ctc=58.990, loss_att=52.724, acc=0.695, loss=54.604, backward_time=0.426, grad_norm=34.010, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.100, optim0_lr0=3.160e-04, train_time=1.433 +[gpub078:0/16] 2024-01-25 22:02:19,301 (trainer:737) INFO: 11epoch:train:301-400batch: iter_time=9.060e-05, forward_time=0.303, loss_ctc=60.356, loss_att=58.335, acc=0.700, loss=58.941, backward_time=0.425, grad_norm=32.827, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.094, optim0_lr0=3.159e-04, train_time=1.843 +[gpub078:0/16] 2024-01-25 22:04:58,437 (trainer:737) INFO: 11epoch:train:401-500batch: iter_time=8.918e-05, forward_time=0.289, loss_ctc=52.096, loss_att=51.648, acc=0.698, loss=51.783, backward_time=0.402, grad_norm=29.034, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.093, optim0_lr0=3.158e-04, train_time=1.591 +[gpub078:0/16] 2024-01-25 22:07:51,868 (trainer:737) INFO: 11epoch:train:501-600batch: iter_time=9.884e-05, forward_time=0.426, loss_ctc=58.693, loss_att=54.223, acc=0.715, loss=55.564, backward_time=0.429, grad_norm=30.631, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.110, optim0_lr0=3.156e-04, train_time=1.734 +[gpub078:0/16] 2024-01-25 22:10:32,652 (trainer:737) INFO: 11epoch:train:601-700batch: iter_time=9.766e-05, forward_time=0.301, loss_ctc=55.643, loss_att=56.293, acc=0.698, loss=56.098, backward_time=0.426, grad_norm=30.538, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.096, optim0_lr0=3.155e-04, train_time=1.608 +[gpub078:0/16] 2024-01-25 22:12:55,630 (trainer:737) INFO: 11epoch:train:701-800batch: iter_time=9.368e-05, forward_time=0.292, loss_ctc=55.735, loss_att=52.888, acc=0.685, loss=53.742, backward_time=0.405, grad_norm=30.136, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.095, optim0_lr0=3.154e-04, train_time=1.430 +[gpub078:0/16] 2024-01-25 22:16:26,355 (trainer:737) INFO: 11epoch:train:801-900batch: iter_time=9.576e-05, forward_time=0.418, loss_ctc=63.970, loss_att=63.140, acc=0.701, loss=63.389, backward_time=0.455, grad_norm=34.882, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.103, optim0_lr0=3.153e-04, train_time=2.106 +[gpub078:0/16] 2024-01-25 22:18:52,160 (trainer:737) INFO: 11epoch:train:901-1000batch: iter_time=9.383e-05, forward_time=0.307, loss_ctc=67.886, loss_att=60.803, acc=0.708, loss=62.928, backward_time=0.431, grad_norm=35.862, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.095, optim0_lr0=3.152e-04, train_time=1.458 +[gpub078:0/16] 2024-01-25 22:21:47,881 (trainer:737) INFO: 11epoch:train:1001-1100batch: iter_time=9.946e-05, forward_time=0.295, loss_ctc=62.384, loss_att=60.018, acc=0.687, loss=60.728, backward_time=0.404, grad_norm=32.927, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.095, optim0_lr0=3.151e-04, train_time=1.758 +[gpub078:0/16] 2024-01-25 22:24:37,401 (trainer:737) INFO: 11epoch:train:1101-1200batch: iter_time=1.044e-04, forward_time=0.442, loss_ctc=61.521, loss_att=55.006, acc=0.695, loss=56.961, backward_time=0.437, grad_norm=32.427, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.118, optim0_lr0=3.150e-04, train_time=1.695 +[gpub078:0/16] 2024-01-25 22:26:05,930 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub078:0/16] 2024-01-25 22:26:25,007 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 22:26:28,600 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 22:26:28,600 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub078:0/16] 2024-01-25 22:26:28,603 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 22:35:01,446 (trainer:737) INFO: 11epoch:train:1201-1300batch: iter_time=4.608, forward_time=0.290, loss_ctc=57.822, loss_att=53.920, acc=0.689, loss=55.091, backward_time=0.403, grad_norm=31.545, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.093, optim0_lr0=3.149e-04, train_time=6.241 +[gpub078:0/16] 2024-01-25 22:37:37,446 (trainer:737) INFO: 11epoch:train:1301-1400batch: iter_time=7.834e-05, forward_time=0.296, loss_ctc=61.497, loss_att=61.371, acc=0.680, loss=61.409, backward_time=0.409, grad_norm=34.636, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.095, optim0_lr0=3.148e-04, train_time=1.560 +[gpub078:0/16] 2024-01-25 22:40:15,162 (trainer:737) INFO: 11epoch:train:1401-1500batch: iter_time=8.535e-05, forward_time=0.367, loss_ctc=61.433, loss_att=62.319, acc=0.680, loss=62.053, backward_time=0.407, grad_norm=44.408, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.099, optim0_lr0=3.147e-04, train_time=1.577 +[gpub078:0/16] 2024-01-25 22:42:35,929 (trainer:737) INFO: 11epoch:train:1501-1600batch: iter_time=8.750e-05, forward_time=0.303, loss_ctc=64.336, loss_att=56.830, acc=0.689, loss=59.082, backward_time=0.421, grad_norm=34.100, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.097, optim0_lr0=3.146e-04, train_time=1.407 +[gpub078:0/16] 2024-01-25 22:45:47,339 (trainer:737) INFO: 11epoch:train:1601-1700batch: iter_time=8.340e-05, forward_time=0.305, loss_ctc=55.738, loss_att=53.968, acc=0.703, loss=54.499, backward_time=0.414, grad_norm=31.481, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.094, optim0_lr0=3.145e-04, train_time=1.914 +[gpub078:0/16] 2024-01-25 22:48:50,097 (trainer:737) INFO: 11epoch:train:1701-1800batch: iter_time=8.276e-05, forward_time=0.294, loss_ctc=56.537, loss_att=54.917, acc=0.711, loss=55.403, backward_time=0.407, grad_norm=29.294, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.094, optim0_lr0=3.144e-04, train_time=1.827 +[gpub078:0/16] 2024-01-25 22:51:36,509 (trainer:737) INFO: 11epoch:train:1801-1900batch: iter_time=0.002, forward_time=0.511, loss_ctc=54.292, loss_att=52.673, acc=0.708, loss=53.158, backward_time=0.425, grad_norm=28.916, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.104, optim0_lr0=3.143e-04, train_time=1.664 +[gpub078:0/16] 2024-01-25 22:53:59,669 (trainer:737) INFO: 11epoch:train:1901-2000batch: iter_time=8.069e-05, forward_time=0.291, loss_ctc=51.416, loss_att=51.593, acc=0.698, loss=51.540, backward_time=0.403, grad_norm=29.749, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.093, optim0_lr0=3.142e-04, train_time=1.431 +[gpub078:0/16] 2024-01-25 22:57:19,330 (trainer:737) INFO: 11epoch:train:2001-2100batch: iter_time=8.858e-05, forward_time=0.300, loss_ctc=63.999, loss_att=64.377, acc=0.697, loss=64.263, backward_time=0.419, grad_norm=32.170, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.095, optim0_lr0=3.141e-04, train_time=1.997 +[gpub078:0/16] 2024-01-25 23:00:12,157 (trainer:737) INFO: 11epoch:train:2101-2200batch: iter_time=8.521e-05, forward_time=0.525, loss_ctc=62.340, loss_att=53.368, acc=0.717, loss=56.060, backward_time=0.438, grad_norm=33.726, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.103, optim0_lr0=3.140e-04, train_time=1.727 +[gpub078:0/16] 2024-01-25 23:02:41,480 (trainer:737) INFO: 11epoch:train:2201-2300batch: iter_time=8.394e-05, forward_time=0.290, loss_ctc=63.179, loss_att=59.321, acc=0.694, loss=60.478, backward_time=0.401, grad_norm=33.750, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.139e-04, train_time=1.494 +[gpub078:0/16] 2024-01-25 23:05:03,201 (trainer:737) INFO: 11epoch:train:2301-2400batch: iter_time=9.304e-05, forward_time=0.292, loss_ctc=61.924, loss_att=56.124, acc=0.704, loss=57.864, backward_time=0.407, grad_norm=32.652, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.094, optim0_lr0=3.138e-04, train_time=1.416 +[gpub078:0/16] 2024-01-25 23:08:17,669 (trainer:737) INFO: 11epoch:train:2401-2500batch: iter_time=9.067e-05, forward_time=0.470, loss_ctc=54.896, loss_att=55.258, acc=0.672, loss=55.149, backward_time=0.424, grad_norm=33.291, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.099, optim0_lr0=3.137e-04, train_time=1.945 +[gpub078:0/16] 2024-01-25 23:08:37,870 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub078:0/16] 2024-01-25 23:08:56,893 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 23:09:00,425 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 23:09:00,425 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub078:0/16] 2024-01-25 23:09:00,428 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 23:17:58,019 (trainer:737) INFO: 11epoch:train:2501-2600batch: iter_time=4.294, forward_time=0.289, loss_ctc=62.113, loss_att=53.936, acc=0.711, loss=56.389, backward_time=0.402, grad_norm=31.873, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.136e-04, train_time=5.803 +[gpub078:0/16] 2024-01-25 23:20:23,467 (trainer:737) INFO: 11epoch:train:2601-2700batch: iter_time=8.754e-05, forward_time=0.291, loss_ctc=66.836, loss_att=71.534, acc=0.659, loss=70.125, backward_time=0.403, grad_norm=44.182, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.135e-04, train_time=1.454 +[gpub078:0/16] 2024-01-25 23:23:30,063 (trainer:737) INFO: 11epoch:train:2701-2800batch: iter_time=8.864e-05, forward_time=0.393, loss_ctc=56.932, loss_att=51.147, acc=0.703, loss=52.882, backward_time=0.496, grad_norm=31.944, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.104, optim0_lr0=3.134e-04, train_time=1.866 +[gpub078:0/16] 2024-01-25 23:26:11,337 (trainer:737) INFO: 11epoch:train:2801-2900batch: iter_time=8.871e-05, forward_time=0.291, loss_ctc=57.993, loss_att=56.603, acc=0.708, loss=57.020, backward_time=0.404, grad_norm=30.670, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.133e-04, train_time=1.613 +[gpub078:0/16] 2024-01-25 23:28:45,710 (trainer:737) INFO: 11epoch:train:2901-3000batch: iter_time=9.462e-05, forward_time=0.291, loss_ctc=50.987, loss_att=50.990, acc=0.702, loss=50.989, backward_time=0.403, grad_norm=26.786, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.132e-04, train_time=1.544 +[gpub078:0/16] 2024-01-25 23:31:17,917 (trainer:737) INFO: 11epoch:train:3001-3100batch: iter_time=9.358e-05, forward_time=0.289, loss_ctc=56.546, loss_att=52.196, acc=0.722, loss=53.501, backward_time=0.402, grad_norm=29.612, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.131e-04, train_time=1.522 +[gpub078:0/16] 2024-01-25 23:34:20,961 (trainer:737) INFO: 11epoch:train:3101-3200batch: iter_time=1.018e-04, forward_time=0.466, loss_ctc=53.846, loss_att=55.026, acc=0.702, loss=54.672, backward_time=0.430, grad_norm=29.283, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.146, optim0_lr0=3.130e-04, train_time=1.830 +[gpub078:0/16] 2024-01-25 23:37:14,228 (trainer:737) INFO: 11epoch:train:3201-3300batch: iter_time=9.710e-05, forward_time=0.290, loss_ctc=53.347, loss_att=49.845, acc=0.696, loss=50.896, backward_time=0.399, grad_norm=29.303, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.129e-04, train_time=1.733 +[gpub078:0/16] 2024-01-25 23:39:50,771 (trainer:737) INFO: 11epoch:train:3301-3400batch: iter_time=9.308e-05, forward_time=0.293, loss_ctc=61.590, loss_att=60.899, acc=0.714, loss=61.106, backward_time=0.405, grad_norm=31.206, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.094, optim0_lr0=3.128e-04, train_time=1.565 +[gpub078:0/16] 2024-01-25 23:42:54,113 (trainer:737) INFO: 11epoch:train:3401-3500batch: iter_time=2.345e-04, forward_time=0.468, loss_ctc=64.494, loss_att=58.688, acc=0.715, loss=60.430, backward_time=0.488, grad_norm=32.136, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.102, optim0_lr0=3.127e-04, train_time=1.833 +[gpub078:0/16] 2024-01-25 23:45:28,801 (trainer:737) INFO: 11epoch:train:3501-3600batch: iter_time=9.321e-05, forward_time=0.289, loss_ctc=59.928, loss_att=58.090, acc=0.692, loss=58.641, backward_time=0.401, grad_norm=32.367, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.125e-04, train_time=1.546 +[gpub078:0/16] 2024-01-25 23:48:22,931 (trainer:737) INFO: 11epoch:train:3601-3700batch: iter_time=9.172e-05, forward_time=0.292, loss_ctc=58.371, loss_att=52.794, acc=0.703, loss=54.467, backward_time=0.399, grad_norm=31.589, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.124e-04, train_time=1.742 +[gpub078:0/16] 2024-01-25 23:49:52,815 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub078:0/16] 2024-01-25 23:50:12,025 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-25 23:50:15,677 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-25 23:50:15,677 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub078:0/16] 2024-01-25 23:50:15,680 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-25 23:58:36,581 (trainer:737) INFO: 11epoch:train:3701-3800batch: iter_time=4.332, forward_time=0.290, loss_ctc=56.669, loss_att=53.127, acc=0.692, loss=54.190, backward_time=0.401, grad_norm=31.388, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.123e-04, train_time=6.136 +[gpub078:0/16] 2024-01-26 00:01:36,519 (trainer:737) INFO: 11epoch:train:3801-3900batch: iter_time=8.955e-05, forward_time=0.493, loss_ctc=59.983, loss_att=60.154, acc=0.683, loss=60.103, backward_time=0.457, grad_norm=32.578, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.124, optim0_lr0=3.122e-04, train_time=1.798 +[gpub078:0/16] 2024-01-26 00:04:00,279 (trainer:737) INFO: 11epoch:train:3901-4000batch: iter_time=8.289e-05, forward_time=0.289, loss_ctc=61.288, loss_att=61.371, acc=0.685, loss=61.346, backward_time=0.403, grad_norm=45.070, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.093, optim0_lr0=3.121e-04, train_time=1.438 +[gpub078:0/16] 2024-01-26 00:07:32,287 (trainer:737) INFO: 11epoch:train:4001-4100batch: iter_time=9.700e-05, forward_time=0.291, loss_ctc=62.440, loss_att=55.714, acc=0.693, loss=57.732, backward_time=0.402, grad_norm=31.508, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.120e-04, train_time=2.120 +[gpub078:0/16] 2024-01-26 00:09:46,747 (trainer:737) INFO: 11epoch:train:4101-4200batch: iter_time=1.015e-04, forward_time=0.290, loss_ctc=54.856, loss_att=53.282, acc=0.708, loss=53.754, backward_time=0.404, grad_norm=29.758, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.119e-04, train_time=1.344 +[gpub078:0/16] 2024-01-26 00:12:52,630 (trainer:737) INFO: 11epoch:train:4201-4300batch: iter_time=9.463e-05, forward_time=0.420, loss_ctc=54.590, loss_att=52.897, acc=0.717, loss=53.405, backward_time=0.486, grad_norm=27.128, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.108, optim0_lr0=3.118e-04, train_time=1.858 +[gpub078:0/16] 2024-01-26 00:15:40,720 (trainer:737) INFO: 11epoch:train:4301-4400batch: iter_time=8.625e-05, forward_time=0.290, loss_ctc=53.278, loss_att=52.732, acc=0.710, loss=52.896, backward_time=0.402, grad_norm=28.127, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.117e-04, train_time=1.681 +[gpub078:0/16] 2024-01-26 00:18:19,815 (trainer:737) INFO: 11epoch:train:4401-4500batch: iter_time=7.939e-05, forward_time=0.323, loss_ctc=50.281, loss_att=50.985, acc=0.700, loss=50.774, backward_time=0.401, grad_norm=30.158, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.116e-04, train_time=1.591 +[gpub078:0/16] 2024-01-26 00:22:01,195 (trainer:737) INFO: 11epoch:train:4501-4600batch: iter_time=8.462e-05, forward_time=0.567, loss_ctc=63.029, loss_att=64.001, acc=0.701, loss=63.709, backward_time=0.446, grad_norm=31.619, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.123, optim0_lr0=3.115e-04, train_time=2.214 +[gpub078:0/16] 2024-01-26 00:24:19,790 (trainer:737) INFO: 11epoch:train:4601-4700batch: iter_time=8.948e-05, forward_time=0.290, loss_ctc=61.180, loss_att=52.502, acc=0.721, loss=55.105, backward_time=0.405, grad_norm=34.266, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.114e-04, train_time=1.385 +[gpub078:0/16] 2024-01-26 00:28:28,545 (trainer:737) INFO: 11epoch:train:4701-4800batch: iter_time=1.042e-04, forward_time=0.291, loss_ctc=60.939, loss_att=58.469, acc=0.697, loss=59.210, backward_time=0.404, grad_norm=34.370, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.113e-04, train_time=2.487 +[gpub078:0/16] 2024-01-26 00:30:46,234 (trainer:737) INFO: 11epoch:train:4801-4900batch: iter_time=9.618e-05, forward_time=0.292, loss_ctc=60.108, loss_att=54.804, acc=0.707, loss=56.396, backward_time=0.404, grad_norm=31.107, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.112e-04, train_time=1.378 +[gpub078:0/16] 2024-01-26 00:33:36,560 (trainer:737) INFO: 11epoch:train:4901-5000batch: iter_time=9.102e-05, forward_time=0.460, loss_ctc=54.315, loss_att=54.426, acc=0.675, loss=54.392, backward_time=0.539, grad_norm=31.947, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.103, optim0_lr0=3.111e-04, train_time=1.703 +[gpub078:0/16] 2024-01-26 00:33:56,588 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub078:0/16] 2024-01-26 00:34:15,732 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 00:34:19,278 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 00:34:19,278 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub078:0/16] 2024-01-26 00:34:19,281 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 00:46:11,947 (trainer:737) INFO: 11epoch:train:5001-5100batch: iter_time=5.492, forward_time=0.459, loss_ctc=61.287, loss_att=55.473, acc=0.722, loss=57.217, backward_time=0.529, grad_norm=33.431, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.111, optim0_lr0=3.110e-04, train_time=7.554 +[gpub078:0/16] 2024-01-26 00:49:04,577 (trainer:737) INFO: 11epoch:train:5101-5200batch: iter_time=8.455e-05, forward_time=0.292, loss_ctc=63.932, loss_att=74.844, acc=0.662, loss=71.570, backward_time=0.405, grad_norm=41.003, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.109e-04, train_time=1.725 +[gpub078:0/16] 2024-01-26 00:52:40,335 (trainer:737) INFO: 11epoch:train:5201-5300batch: iter_time=8.361e-05, forward_time=0.521, loss_ctc=55.875, loss_att=53.128, acc=0.709, loss=53.952, backward_time=0.444, grad_norm=32.203, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.146, optim0_lr0=3.108e-04, train_time=2.158 +[gpub078:0/16] 2024-01-26 00:55:34,954 (trainer:737) INFO: 11epoch:train:5301-5400batch: iter_time=8.676e-05, forward_time=0.340, loss_ctc=56.750, loss_att=58.482, acc=0.710, loss=57.963, backward_time=0.405, grad_norm=31.576, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.093, optim0_lr0=3.107e-04, train_time=1.746 +[gpub078:0/16] 2024-01-26 00:59:07,215 (trainer:737) INFO: 11epoch:train:5401-5500batch: iter_time=6.003e-04, forward_time=0.479, loss_ctc=50.228, loss_att=53.094, acc=0.713, loss=52.234, backward_time=0.450, grad_norm=26.553, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.106, optim0_lr0=3.106e-04, train_time=2.123 +[gpub078:0/16] 2024-01-26 01:02:01,684 (trainer:737) INFO: 11epoch:train:5501-5600batch: iter_time=9.155e-05, forward_time=0.292, loss_ctc=56.230, loss_att=56.216, acc=0.726, loss=56.220, backward_time=0.414, grad_norm=28.865, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.096, optim0_lr0=3.105e-04, train_time=1.744 +[gpub078:0/16] 2024-01-26 01:06:28,862 (trainer:737) INFO: 11epoch:train:5601-5700batch: iter_time=8.756e-05, forward_time=0.582, loss_ctc=52.930, loss_att=54.960, acc=0.706, loss=54.351, backward_time=0.453, grad_norm=28.857, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.098, optim0_lr0=3.104e-04, train_time=2.672 +[gpub078:0/16] 2024-01-26 01:08:52,416 (trainer:737) INFO: 11epoch:train:5701-5800batch: iter_time=6.102e-04, forward_time=0.345, loss_ctc=52.309, loss_att=50.099, acc=0.703, loss=50.762, backward_time=0.413, grad_norm=28.851, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.096, optim0_lr0=3.103e-04, train_time=1.434 +[gpub078:0/16] 2024-01-26 01:13:05,684 (trainer:737) INFO: 11epoch:train:5801-5900batch: iter_time=0.023, forward_time=0.522, loss_ctc=60.238, loss_att=61.913, acc=0.716, loss=61.411, backward_time=0.435, grad_norm=31.501, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.095, optim0_lr0=3.102e-04, train_time=2.532 +[gpub078:0/16] 2024-01-26 01:16:19,055 (trainer:737) INFO: 11epoch:train:5901-6000batch: iter_time=3.281e-04, forward_time=0.472, loss_ctc=62.998, loss_att=59.026, acc=0.720, loss=60.217, backward_time=0.479, grad_norm=33.204, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.116, optim0_lr0=3.101e-04, train_time=1.934 +[gpub078:0/16] 2024-01-26 01:19:20,138 (trainer:737) INFO: 11epoch:train:6001-6100batch: iter_time=8.685e-05, forward_time=0.307, loss_ctc=58.904, loss_att=59.876, acc=0.698, loss=59.584, backward_time=0.403, grad_norm=32.602, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.095, optim0_lr0=3.100e-04, train_time=1.811 +[gpub078:0/16] 2024-01-26 01:22:39,344 (trainer:737) INFO: 11epoch:train:6101-6200batch: iter_time=2.155e-04, forward_time=0.548, loss_ctc=57.966, loss_att=53.135, acc=0.717, loss=54.584, backward_time=0.443, grad_norm=31.845, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.097, optim0_lr0=3.099e-04, train_time=1.992 +[gpub078:0/16] 2024-01-26 01:24:55,036 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub078:0/16] 2024-01-26 01:25:13,891 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 01:25:17,835 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 01:25:17,835 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub078:0/16] 2024-01-26 01:25:18,064 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 01:35:22,225 (trainer:737) INFO: 11epoch:train:6201-6300batch: iter_time=5.653, forward_time=0.385, loss_ctc=55.155, loss_att=53.000, acc=0.709, loss=53.647, backward_time=0.420, grad_norm=29.917, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.097, optim0_lr0=3.098e-04, train_time=7.629 +[gpub078:0/16] 2024-01-26 01:37:58,460 (trainer:737) INFO: 11epoch:train:6301-6400batch: iter_time=1.032e-04, forward_time=0.313, loss_ctc=59.904, loss_att=61.556, acc=0.692, loss=61.060, backward_time=0.407, grad_norm=32.064, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.094, optim0_lr0=3.097e-04, train_time=1.562 +[gpub078:0/16] 2024-01-26 01:40:44,883 (trainer:737) INFO: 11epoch:train:6401-6500batch: iter_time=9.958e-04, forward_time=0.443, loss_ctc=57.734, loss_att=60.156, acc=0.694, loss=59.430, backward_time=0.475, grad_norm=44.002, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.100, optim0_lr0=3.096e-04, train_time=1.664 +[gpub078:0/16] 2024-01-26 01:43:52,058 (trainer:737) INFO: 11epoch:train:6501-6600batch: iter_time=9.971e-05, forward_time=0.300, loss_ctc=61.967, loss_att=57.632, acc=0.703, loss=58.933, backward_time=0.418, grad_norm=32.218, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.095e-04, train_time=1.872 +[gpub078:0/16] 2024-01-26 01:46:27,360 (trainer:737) INFO: 11epoch:train:6601-6700batch: iter_time=1.040e-04, forward_time=0.290, loss_ctc=54.439, loss_att=53.746, acc=0.716, loss=53.954, backward_time=0.405, grad_norm=28.697, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.094, optim0_lr0=3.094e-04, train_time=1.553 +[gpub078:0/16] 2024-01-26 01:49:14,686 (trainer:737) INFO: 11epoch:train:6701-6800batch: iter_time=2.093e-04, forward_time=0.488, loss_ctc=53.870, loss_att=53.746, acc=0.729, loss=53.783, backward_time=0.440, grad_norm=26.484, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.138, optim0_lr0=3.093e-04, train_time=1.673 +[gpub078:0/16] 2024-01-26 01:52:17,015 (trainer:737) INFO: 11epoch:train:6801-6900batch: iter_time=9.776e-05, forward_time=0.287, loss_ctc=52.117, loss_att=54.879, acc=0.715, loss=54.050, backward_time=0.402, grad_norm=28.529, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.092e-04, train_time=1.823 +[gpub078:0/16] 2024-01-26 01:55:01,951 (trainer:737) INFO: 11epoch:train:6901-7000batch: iter_time=9.816e-05, forward_time=0.309, loss_ctc=50.115, loss_att=51.221, acc=0.704, loss=50.889, backward_time=0.407, grad_norm=30.102, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.091e-04, train_time=1.649 +[gpub078:0/16] 2024-01-26 01:58:27,364 (trainer:737) INFO: 11epoch:train:7001-7100batch: iter_time=0.001, forward_time=0.440, loss_ctc=62.268, loss_att=63.217, acc=0.712, loss=62.932, backward_time=0.440, grad_norm=29.874, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.121, optim0_lr0=3.090e-04, train_time=2.054 +[gpub078:0/16] 2024-01-26 02:00:47,046 (trainer:737) INFO: 11epoch:train:7101-7200batch: iter_time=8.859e-05, forward_time=0.312, loss_ctc=60.067, loss_att=53.705, acc=0.726, loss=55.614, backward_time=0.407, grad_norm=32.219, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.089e-04, train_time=1.396 +[gpub078:0/16] 2024-01-26 02:03:02,510 (trainer:737) INFO: 11epoch:train:7201-7300batch: iter_time=9.517e-05, forward_time=0.293, loss_ctc=59.461, loss_att=58.507, acc=0.704, loss=58.793, backward_time=0.405, grad_norm=33.501, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.089e-04, train_time=1.355 +[gpub078:0/16] 2024-01-26 02:06:38,118 (trainer:737) INFO: 11epoch:train:7301-7400batch: iter_time=1.033e-04, forward_time=0.373, loss_ctc=59.761, loss_att=56.362, acc=0.716, loss=57.382, backward_time=0.498, grad_norm=32.260, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.101, optim0_lr0=3.088e-04, train_time=2.156 +[gpub078:0/16] 2024-01-26 02:08:50,019 (trainer:737) INFO: 11epoch:train:7401-7500batch: iter_time=8.462e-05, forward_time=0.289, loss_ctc=52.803, loss_att=54.994, acc=0.689, loss=54.337, backward_time=0.402, grad_norm=30.225, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.087e-04, train_time=1.319 +[gpub078:0/16] 2024-01-26 02:09:10,047 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub078:0/16] 2024-01-26 02:09:28,783 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 02:09:32,628 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 02:09:32,628 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub078:0/16] 2024-01-26 02:09:32,631 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 02:18:07,798 (trainer:737) INFO: 11epoch:train:7501-7600batch: iter_time=4.133, forward_time=0.308, loss_ctc=60.826, loss_att=52.968, acc=0.726, loss=55.325, backward_time=0.408, grad_norm=30.856, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.086e-04, train_time=5.577 +[gpub078:0/16] 2024-01-26 02:21:55,929 (trainer:737) INFO: 11epoch:train:7601-7700batch: iter_time=8.243e-05, forward_time=0.354, loss_ctc=65.314, loss_att=71.765, acc=0.669, loss=69.830, backward_time=0.515, grad_norm=41.282, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.103, optim0_lr0=3.085e-04, train_time=2.281 +[gpub078:0/16] 2024-01-26 02:24:10,999 (trainer:737) INFO: 11epoch:train:7701-7800batch: iter_time=9.068e-05, forward_time=0.310, loss_ctc=54.961, loss_att=51.265, acc=0.713, loss=52.374, backward_time=0.405, grad_norm=29.746, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.093, optim0_lr0=3.084e-04, train_time=1.351 +[gpub078:0/16] 2024-01-26 02:26:34,562 (trainer:737) INFO: 11epoch:train:7801-7900batch: iter_time=9.499e-05, forward_time=0.309, loss_ctc=55.901, loss_att=55.730, acc=0.721, loss=55.781, backward_time=0.413, grad_norm=30.192, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.103, optim0_lr0=3.083e-04, train_time=1.435 +[gpub078:0/16] 2024-01-26 02:29:32,364 (trainer:737) INFO: 11epoch:train:7901-8000batch: iter_time=9.721e-05, forward_time=0.344, loss_ctc=50.062, loss_att=50.725, acc=0.721, loss=50.526, backward_time=0.466, grad_norm=27.078, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.096, optim0_lr0=3.082e-04, train_time=1.778 +[gpub078:0/16] 2024-01-26 02:32:30,499 (trainer:737) INFO: 11epoch:train:8001-8100batch: iter_time=9.297e-05, forward_time=0.313, loss_ctc=55.852, loss_att=55.511, acc=0.726, loss=55.614, backward_time=0.407, grad_norm=29.642, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.081e-04, train_time=1.782 +[gpub078:0/16] 2024-01-26 02:34:43,018 (trainer:737) INFO: 11epoch:train:8101-8200batch: iter_time=9.266e-05, forward_time=0.290, loss_ctc=52.582, loss_att=54.557, acc=0.708, loss=53.965, backward_time=0.403, grad_norm=29.325, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.080e-04, train_time=1.325 +[gpub078:0/16] 2024-01-26 02:37:57,053 (trainer:737) INFO: 11epoch:train:8201-8300batch: iter_time=9.039e-05, forward_time=0.360, loss_ctc=52.426, loss_att=49.462, acc=0.707, loss=50.351, backward_time=0.541, grad_norm=30.153, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.106, optim0_lr0=3.079e-04, train_time=1.940 +[gpub078:0/16] 2024-01-26 02:41:06,305 (trainer:737) INFO: 11epoch:train:8301-8400batch: iter_time=9.295e-05, forward_time=0.308, loss_ctc=59.650, loss_att=61.414, acc=0.719, loss=60.885, backward_time=0.413, grad_norm=31.910, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.078e-04, train_time=1.892 +[gpub078:0/16] 2024-01-26 02:43:45,539 (trainer:737) INFO: 11epoch:train:8401-8500batch: iter_time=9.564e-05, forward_time=0.478, loss_ctc=62.648, loss_att=57.748, acc=0.726, loss=59.218, backward_time=0.434, grad_norm=31.731, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.098, optim0_lr0=3.077e-04, train_time=1.593 +[gpub078:0/16] 2024-01-26 02:46:26,625 (trainer:737) INFO: 11epoch:train:8501-8600batch: iter_time=8.512e-05, forward_time=0.294, loss_ctc=58.216, loss_att=59.085, acc=0.700, loss=58.824, backward_time=0.403, grad_norm=32.924, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.076e-04, train_time=1.609 +[gpub078:0/16] 2024-01-26 02:49:11,055 (trainer:737) INFO: 11epoch:train:8601-8700batch: iter_time=9.348e-05, forward_time=0.312, loss_ctc=56.984, loss_att=52.107, acc=0.720, loss=53.570, backward_time=0.414, grad_norm=32.754, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.075e-04, train_time=1.645 +[gpub078:0/16] 2024-01-26 02:50:55,326 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub078:0/16] 2024-01-26 02:51:14,692 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 02:51:18,359 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 02:51:18,359 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub078:0/16] 2024-01-26 02:51:18,362 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 02:59:00,306 (trainer:737) INFO: 11epoch:train:8701-8800batch: iter_time=4.261, forward_time=0.378, loss_ctc=54.978, loss_att=52.571, acc=0.709, loss=53.293, backward_time=0.421, grad_norm=28.401, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.094, optim0_lr0=3.074e-04, train_time=5.893 +[gpub078:0/16] 2024-01-26 03:01:24,892 (trainer:737) INFO: 11epoch:train:8801-8900batch: iter_time=7.798e-05, forward_time=0.290, loss_ctc=58.909, loss_att=60.049, acc=0.696, loss=59.707, backward_time=0.405, grad_norm=32.924, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.073e-04, train_time=1.445 +[gpub078:0/16] 2024-01-26 03:04:17,780 (trainer:737) INFO: 11epoch:train:8901-9000batch: iter_time=8.804e-05, forward_time=0.302, loss_ctc=56.280, loss_att=59.451, acc=0.698, loss=58.500, backward_time=0.407, grad_norm=39.910, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.072e-04, train_time=1.729 +[gpub078:0/16] 2024-01-26 03:07:22,059 (trainer:737) INFO: 11epoch:train:9001-9100batch: iter_time=0.003, forward_time=0.391, loss_ctc=60.868, loss_att=56.688, acc=0.706, loss=57.942, backward_time=0.447, grad_norm=31.654, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.103, optim0_lr0=3.071e-04, train_time=1.843 +[gpub078:0/16] 2024-01-26 03:09:36,588 (trainer:737) INFO: 11epoch:train:9101-9200batch: iter_time=9.581e-05, forward_time=0.291, loss_ctc=54.348, loss_att=53.165, acc=0.717, loss=53.520, backward_time=0.404, grad_norm=36.381, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.070e-04, train_time=1.345 +[gpub078:0/16] 2024-01-26 03:12:26,590 (trainer:737) INFO: 11epoch:train:9201-9300batch: iter_time=1.182e-04, forward_time=0.311, loss_ctc=53.932, loss_att=53.080, acc=0.731, loss=53.336, backward_time=0.409, grad_norm=26.450, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.069e-04, train_time=1.699 +[gpub078:0/16] 2024-01-26 03:15:16,984 (trainer:737) INFO: 11epoch:train:9301-9400batch: iter_time=1.075e-04, forward_time=0.393, loss_ctc=52.391, loss_att=55.155, acc=0.716, loss=54.326, backward_time=0.413, grad_norm=28.547, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.107, optim0_lr0=3.068e-04, train_time=1.704 +[gpub078:0/16] 2024-01-26 03:17:37,004 (trainer:737) INFO: 11epoch:train:9401-9500batch: iter_time=9.730e-05, forward_time=0.331, loss_ctc=49.447, loss_att=50.369, acc=0.709, loss=50.092, backward_time=0.415, grad_norm=28.386, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.067e-04, train_time=1.400 +[gpub078:0/16] 2024-01-26 03:20:21,279 (trainer:737) INFO: 11epoch:train:9501-9600batch: iter_time=9.181e-05, forward_time=0.308, loss_ctc=61.857, loss_att=62.623, acc=0.716, loss=62.393, backward_time=0.413, grad_norm=31.695, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.094, optim0_lr0=3.066e-04, train_time=1.642 +[gpub078:0/16] 2024-01-26 03:23:31,658 (trainer:737) INFO: 11epoch:train:9601-9700batch: iter_time=9.876e-05, forward_time=0.333, loss_ctc=59.273, loss_att=52.677, acc=0.729, loss=54.656, backward_time=0.463, grad_norm=33.061, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.106, optim0_lr0=3.065e-04, train_time=1.904 +[gpub078:0/16] 2024-01-26 03:25:50,567 (trainer:737) INFO: 11epoch:train:9701-9800batch: iter_time=9.816e-05, forward_time=0.331, loss_ctc=59.278, loss_att=57.788, acc=0.705, loss=58.235, backward_time=0.417, grad_norm=33.424, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.064e-04, train_time=1.389 +[gpub078:0/16] 2024-01-26 03:28:20,543 (trainer:737) INFO: 11epoch:train:9801-9900batch: iter_time=9.027e-05, forward_time=0.316, loss_ctc=59.065, loss_att=55.901, acc=0.720, loss=56.850, backward_time=0.411, grad_norm=29.954, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.093, optim0_lr0=3.063e-04, train_time=1.499 +[gpub078:0/16] 2024-01-26 03:31:08,072 (trainer:737) INFO: 11epoch:train:9901-10000batch: iter_time=8.729e-05, forward_time=0.340, loss_ctc=52.719, loss_att=54.411, acc=0.691, loss=53.904, backward_time=0.411, grad_norm=29.552, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.095, optim0_lr0=3.062e-04, train_time=1.675 +[gpub078:0/16] 2024-01-26 03:31:28,433 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub078:0/16] 2024-01-26 03:31:47,710 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 03:31:51,347 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 03:31:51,347 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub078:0/16] 2024-01-26 03:31:51,350 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 03:40:35,607 (trainer:737) INFO: 11epoch:train:10001-10100batch: iter_time=3.967, forward_time=0.369, loss_ctc=60.760, loss_att=52.385, acc=0.730, loss=54.897, backward_time=0.418, grad_norm=30.634, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.096, optim0_lr0=3.061e-04, train_time=5.675 +[gpub078:0/16] 2024-01-26 03:43:09,827 (trainer:737) INFO: 11epoch:train:10101-10200batch: iter_time=8.664e-05, forward_time=0.324, loss_ctc=61.808, loss_att=70.731, acc=0.674, loss=68.054, backward_time=0.417, grad_norm=40.193, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.101, optim0_lr0=3.060e-04, train_time=1.542 +[gpub078:0/16] 2024-01-26 03:45:46,408 (trainer:737) INFO: 11epoch:train:10201-10300batch: iter_time=1.146e-04, forward_time=0.386, loss_ctc=54.946, loss_att=51.598, acc=0.713, loss=52.603, backward_time=0.413, grad_norm=31.191, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.097, optim0_lr0=3.059e-04, train_time=1.565 +[gpub078:0/16] 2024-01-26 03:48:31,675 (trainer:737) INFO: 11epoch:train:10301-10400batch: iter_time=9.296e-05, forward_time=0.315, loss_ctc=55.932, loss_att=56.122, acc=0.720, loss=56.065, backward_time=0.409, grad_norm=29.567, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.059e-04, train_time=1.652 +[gpub078:0/16] 2024-01-26 03:51:11,800 (trainer:737) INFO: 11epoch:train:10401-10500batch: iter_time=9.385e-05, forward_time=0.362, loss_ctc=49.719, loss_att=50.587, acc=0.721, loss=50.326, backward_time=0.482, grad_norm=27.469, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.102, optim0_lr0=3.058e-04, train_time=1.601 +[gpub078:0/16] 2024-01-26 03:54:04,442 (trainer:737) INFO: 11epoch:train:10501-10600batch: iter_time=9.022e-05, forward_time=0.292, loss_ctc=55.620, loss_att=54.630, acc=0.732, loss=54.927, backward_time=0.403, grad_norm=29.133, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.092, optim0_lr0=3.057e-04, train_time=1.727 +[gpub078:0/16] 2024-01-26 03:57:16,773 (trainer:737) INFO: 11epoch:train:10601-10700batch: iter_time=9.613e-05, forward_time=0.463, loss_ctc=52.514, loss_att=54.136, acc=0.710, loss=53.649, backward_time=0.456, grad_norm=29.087, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.106, optim0_lr0=3.056e-04, train_time=1.923 +[gpub078:0/16] 2024-01-26 03:59:45,263 (trainer:737) INFO: 11epoch:train:10701-10800batch: iter_time=1.157e-04, forward_time=0.289, loss_ctc=51.562, loss_att=48.929, acc=0.709, loss=49.719, backward_time=0.402, grad_norm=29.927, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.055e-04, train_time=1.485 +[gpub078:0/16] 2024-01-26 04:02:35,967 (trainer:737) INFO: 11epoch:train:10801-10900batch: iter_time=1.145e-04, forward_time=0.292, loss_ctc=59.433, loss_att=61.093, acc=0.720, loss=60.595, backward_time=0.404, grad_norm=31.037, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.054e-04, train_time=1.707 +[gpub078:0/16] 2024-01-26 04:05:39,542 (trainer:737) INFO: 11epoch:train:10901-11000batch: iter_time=1.009e-04, forward_time=0.430, loss_ctc=62.158, loss_att=57.542, acc=0.726, loss=58.927, backward_time=0.442, grad_norm=30.836, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.109, optim0_lr0=3.053e-04, train_time=1.835 +[gpub078:0/16] 2024-01-26 04:08:05,093 (trainer:737) INFO: 11epoch:train:11001-11100batch: iter_time=1.034e-04, forward_time=0.290, loss_ctc=57.336, loss_att=57.630, acc=0.703, loss=57.542, backward_time=0.403, grad_norm=31.775, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.052e-04, train_time=1.455 +[gpub078:0/16] 2024-01-26 04:10:51,626 (trainer:737) INFO: 11epoch:train:11101-11200batch: iter_time=1.019e-04, forward_time=0.289, loss_ctc=56.156, loss_att=51.479, acc=0.723, loss=52.882, backward_time=0.401, grad_norm=29.889, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.051e-04, train_time=1.665 +[gpub078:0/16] 2024-01-26 04:12:50,422 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub078:0/16] 2024-01-26 04:13:09,259 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 04:13:12,883 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 04:13:12,883 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub078:0/16] 2024-01-26 04:13:12,908 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 04:20:45,921 (trainer:737) INFO: 11epoch:train:11201-11300batch: iter_time=4.074, forward_time=0.396, loss_ctc=54.981, loss_att=51.566, acc=0.718, loss=52.590, backward_time=0.563, grad_norm=30.201, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.107, optim0_lr0=3.050e-04, train_time=5.943 +[gpub078:0/16] 2024-01-26 04:23:26,871 (trainer:737) INFO: 11epoch:train:11301-11400batch: iter_time=1.136e-04, forward_time=0.292, loss_ctc=58.254, loss_att=59.869, acc=0.699, loss=59.384, backward_time=0.403, grad_norm=31.111, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.049e-04, train_time=1.610 +[gpub078:0/16] 2024-01-26 04:26:54,146 (trainer:737) INFO: 11epoch:train:11401-11500batch: iter_time=8.994e-05, forward_time=0.381, loss_ctc=58.221, loss_att=58.789, acc=0.700, loss=58.618, backward_time=0.597, grad_norm=44.655, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.102, optim0_lr0=3.048e-04, train_time=2.072 +[gpub078:0/16] 2024-01-26 04:29:13,590 (trainer:737) INFO: 11epoch:train:11501-11600batch: iter_time=8.390e-05, forward_time=0.294, loss_ctc=60.804, loss_att=57.331, acc=0.703, loss=58.373, backward_time=0.405, grad_norm=31.671, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.047e-04, train_time=1.394 +[gpub078:0/16] 2024-01-26 04:31:49,570 (trainer:737) INFO: 11epoch:train:11601-11700batch: iter_time=8.259e-05, forward_time=0.291, loss_ctc=53.755, loss_att=53.111, acc=0.716, loss=53.304, backward_time=0.404, grad_norm=28.279, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.046e-04, train_time=1.560 +[gpub078:0/16] 2024-01-26 04:35:05,943 (trainer:737) INFO: 11epoch:train:11701-11800batch: iter_time=8.797e-05, forward_time=0.445, loss_ctc=53.524, loss_att=52.704, acc=0.733, loss=52.950, backward_time=0.448, grad_norm=26.358, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.099, optim0_lr0=3.045e-04, train_time=1.963 +[gpub078:0/16] 2024-01-26 04:37:20,932 (trainer:737) INFO: 11epoch:train:11801-11900batch: iter_time=8.405e-05, forward_time=0.291, loss_ctc=52.202, loss_att=53.942, acc=0.721, loss=53.420, backward_time=0.404, grad_norm=28.868, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.093, optim0_lr0=3.044e-04, train_time=1.350 +[gpub078:0/16] 2024-01-26 04:40:34,864 (trainer:737) INFO: 11epoch:train:11901-12000batch: iter_time=0.004, forward_time=0.468, loss_ctc=48.682, loss_att=50.291, acc=0.708, loss=49.808, backward_time=0.428, grad_norm=28.748, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.104, optim0_lr0=3.043e-04, train_time=1.937 +[gpub078:0/16] 2024-01-26 04:43:22,267 (trainer:737) INFO: 11epoch:train:12001-12100batch: iter_time=8.957e-05, forward_time=0.292, loss_ctc=60.967, loss_att=61.901, acc=0.716, loss=61.621, backward_time=0.405, grad_norm=30.037, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.042e-04, train_time=1.674 +[gpub078:0/16] 2024-01-26 04:45:38,041 (trainer:737) INFO: 11epoch:train:12101-12200batch: iter_time=8.445e-05, forward_time=0.291, loss_ctc=58.304, loss_att=51.618, acc=0.733, loss=53.624, backward_time=0.404, grad_norm=31.813, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.041e-04, train_time=1.358 +[gpub078:0/16] 2024-01-26 04:48:57,810 (trainer:737) INFO: 11epoch:train:12201-12300batch: iter_time=9.048e-05, forward_time=0.481, loss_ctc=57.871, loss_att=57.169, acc=0.707, loss=57.379, backward_time=0.438, grad_norm=31.113, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.100, optim0_lr0=3.041e-04, train_time=1.997 +[gpub078:0/16] 2024-01-26 04:51:42,585 (trainer:737) INFO: 11epoch:train:12301-12400batch: iter_time=8.880e-05, forward_time=0.291, loss_ctc=57.979, loss_att=55.383, acc=0.722, loss=56.161, backward_time=0.403, grad_norm=30.513, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.040e-04, train_time=1.648 +[gpub078:0/16] 2024-01-26 04:54:25,571 (trainer:737) INFO: 11epoch:train:12401-12500batch: iter_time=8.603e-05, forward_time=0.414, loss_ctc=51.946, loss_att=53.606, acc=0.698, loss=53.108, backward_time=0.437, grad_norm=29.724, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.109, optim0_lr0=3.039e-04, train_time=1.630 +[gpub078:0/16] 2024-01-26 04:54:45,599 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub078:0/16] 2024-01-26 04:55:04,892 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 04:55:08,445 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 04:55:08,445 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub078:0/16] 2024-01-26 04:55:08,449 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 05:07:49,522 (trainer:737) INFO: 11epoch:train:12501-12600batch: iter_time=3.838, forward_time=0.296, loss_ctc=60.396, loss_att=55.045, acc=0.713, loss=56.651, backward_time=0.403, grad_norm=30.854, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.038e-04, train_time=8.039 +[gpub078:0/16] 2024-01-26 05:10:52,609 (trainer:737) INFO: 11epoch:train:12601-12700batch: iter_time=8.000e-05, forward_time=0.335, loss_ctc=62.120, loss_att=72.446, acc=0.663, loss=69.349, backward_time=0.424, grad_norm=39.670, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.098, optim0_lr0=3.037e-04, train_time=1.831 +[gpub078:0/16] 2024-01-26 05:13:45,934 (trainer:737) INFO: 11epoch:train:12701-12800batch: iter_time=8.376e-05, forward_time=0.397, loss_ctc=54.386, loss_att=50.516, acc=0.709, loss=51.677, backward_time=0.424, grad_norm=31.440, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.097, optim0_lr0=3.036e-04, train_time=1.733 +[gpub078:0/16] 2024-01-26 05:16:33,420 (trainer:737) INFO: 11epoch:train:12801-12900batch: iter_time=8.769e-05, forward_time=0.292, loss_ctc=55.657, loss_att=55.897, acc=0.714, loss=55.825, backward_time=0.403, grad_norm=32.036, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.035e-04, train_time=1.674 +[gpub078:0/16] 2024-01-26 05:19:13,738 (trainer:737) INFO: 11epoch:train:12901-13000batch: iter_time=9.309e-05, forward_time=0.343, loss_ctc=49.526, loss_att=51.684, acc=0.704, loss=51.037, backward_time=0.432, grad_norm=27.943, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.099, optim0_lr0=3.034e-04, train_time=1.603 +[gpub078:0/16] 2024-01-26 05:22:14,153 (trainer:737) INFO: 11epoch:train:13001-13100batch: iter_time=9.512e-05, forward_time=0.360, loss_ctc=55.198, loss_att=51.755, acc=0.726, loss=52.788, backward_time=0.408, grad_norm=30.590, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.097, optim0_lr0=3.033e-04, train_time=1.804 +[gpub078:0/16] 2024-01-26 05:24:40,383 (trainer:737) INFO: 11epoch:train:13101-13200batch: iter_time=9.680e-05, forward_time=0.332, loss_ctc=51.946, loss_att=54.443, acc=0.708, loss=53.694, backward_time=0.418, grad_norm=28.905, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.106, optim0_lr0=3.032e-04, train_time=1.462 +[gpub078:0/16] 2024-01-26 05:27:36,125 (trainer:737) INFO: 11epoch:train:13201-13300batch: iter_time=9.444e-05, forward_time=0.359, loss_ctc=50.640, loss_att=48.869, acc=0.703, loss=49.400, backward_time=0.409, grad_norm=29.031, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.102, optim0_lr0=3.031e-04, train_time=1.758 +[gpub078:0/16] 2024-01-26 05:30:25,569 (trainer:737) INFO: 11epoch:train:13301-13400batch: iter_time=9.187e-05, forward_time=0.359, loss_ctc=59.076, loss_att=59.274, acc=0.720, loss=59.214, backward_time=0.419, grad_norm=32.819, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.094, optim0_lr0=3.030e-04, train_time=1.694 +[gpub078:0/16] 2024-01-26 05:33:20,907 (trainer:737) INFO: 11epoch:train:13401-13500batch: iter_time=9.541e-05, forward_time=0.352, loss_ctc=61.486, loss_att=57.644, acc=0.720, loss=58.796, backward_time=0.431, grad_norm=33.415, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.102, optim0_lr0=3.029e-04, train_time=1.753 +[gpub078:0/16] 2024-01-26 05:35:55,690 (trainer:737) INFO: 11epoch:train:13501-13600batch: iter_time=9.726e-05, forward_time=0.290, loss_ctc=57.428, loss_att=56.598, acc=0.700, loss=56.847, backward_time=0.402, grad_norm=31.307, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.028e-04, train_time=1.548 +[gpub078:0/16] 2024-01-26 05:38:55,703 (trainer:737) INFO: 11epoch:train:13601-13700batch: iter_time=9.121e-05, forward_time=0.422, loss_ctc=56.426, loss_att=52.236, acc=0.710, loss=53.493, backward_time=0.451, grad_norm=34.049, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.100, optim0_lr0=3.028e-04, train_time=1.800 +[gpub078:0/16] 2024-01-26 05:40:38,332 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub078:0/16] 2024-01-26 05:40:57,726 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 05:41:01,264 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 05:41:01,265 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub078:0/16] 2024-01-26 05:41:01,301 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 05:47:48,367 (trainer:737) INFO: 11epoch:train:13701-13800batch: iter_time=3.641, forward_time=0.291, loss_ctc=54.890, loss_att=52.731, acc=0.696, loss=53.379, backward_time=0.401, grad_norm=31.493, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.027e-04, train_time=5.327 +[gpub078:0/16] 2024-01-26 05:50:22,508 (trainer:737) INFO: 11epoch:train:13801-13900batch: iter_time=8.476e-05, forward_time=0.386, loss_ctc=58.152, loss_att=59.986, acc=0.690, loss=59.436, backward_time=0.481, grad_norm=32.497, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.099, optim0_lr0=3.026e-04, train_time=1.541 +[gpub078:0/16] 2024-01-26 05:53:10,562 (trainer:737) INFO: 11epoch:train:13901-14000batch: iter_time=8.257e-05, forward_time=0.288, loss_ctc=56.755, loss_att=58.343, acc=0.696, loss=57.867, backward_time=0.399, grad_norm=39.840, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.093, optim0_lr0=3.025e-04, train_time=1.681 +[gpub078:0/16] 2024-01-26 05:56:06,774 (trainer:737) INFO: 11epoch:train:14001-14100batch: iter_time=8.452e-05, forward_time=0.397, loss_ctc=60.091, loss_att=54.858, acc=0.700, loss=56.428, backward_time=0.419, grad_norm=31.977, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.119, optim0_lr0=3.024e-04, train_time=1.761 +[gpub078:0/16] 2024-01-26 05:58:22,354 (trainer:737) INFO: 11epoch:train:14101-14200batch: iter_time=8.658e-05, forward_time=0.290, loss_ctc=53.252, loss_att=51.976, acc=0.715, loss=52.359, backward_time=0.404, grad_norm=29.589, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.093, optim0_lr0=3.023e-04, train_time=1.356 +[gpub078:0/16] 2024-01-26 06:01:37,732 (trainer:737) INFO: 11epoch:train:14201-14300batch: iter_time=8.579e-05, forward_time=0.411, loss_ctc=52.903, loss_att=53.059, acc=0.718, loss=53.012, backward_time=0.427, grad_norm=27.506, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.118, optim0_lr0=3.022e-04, train_time=1.953 +[gpub078:0/16] 2024-01-26 06:04:10,568 (trainer:737) INFO: 11epoch:train:14301-14400batch: iter_time=9.395e-05, forward_time=0.289, loss_ctc=51.863, loss_att=51.707, acc=0.715, loss=51.754, backward_time=0.402, grad_norm=31.232, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.095, optim0_lr0=3.021e-04, train_time=1.528 +[gpub078:0/16] 2024-01-26 06:07:03,368 (trainer:737) INFO: 11epoch:train:14401-14500batch: iter_time=9.590e-05, forward_time=0.369, loss_ctc=48.776, loss_att=49.622, acc=0.709, loss=49.368, backward_time=0.471, grad_norm=28.401, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.100, optim0_lr0=3.020e-04, train_time=1.729 +[gpub078:0/16] 2024-01-26 06:09:53,274 (trainer:737) INFO: 11epoch:train:14501-14600batch: iter_time=8.917e-05, forward_time=0.351, loss_ctc=60.410, loss_att=62.151, acc=0.710, loss=61.629, backward_time=0.512, grad_norm=32.128, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.102, optim0_lr0=3.019e-04, train_time=1.698 +[gpub078:0/16] 2024-01-26 06:12:28,519 (trainer:737) INFO: 11epoch:train:14601-14700batch: iter_time=9.019e-05, forward_time=0.292, loss_ctc=58.820, loss_att=50.543, acc=0.731, loss=53.026, backward_time=0.409, grad_norm=32.236, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.093, optim0_lr0=3.018e-04, train_time=1.553 +[gpub078:0/16] 2024-01-26 06:15:08,951 (trainer:737) INFO: 11epoch:train:14701-14800batch: iter_time=9.367e-05, forward_time=0.288, loss_ctc=57.923, loss_att=57.201, acc=0.703, loss=57.418, backward_time=0.401, grad_norm=33.675, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.106, optim0_lr0=3.017e-04, train_time=1.599 +[gpub078:0/16] 2024-01-26 06:17:58,782 (trainer:737) INFO: 11epoch:train:14801-14900batch: iter_time=9.469e-05, forward_time=0.406, loss_ctc=58.451, loss_att=53.947, acc=0.712, loss=55.298, backward_time=0.439, grad_norm=31.856, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.095, optim0_lr0=3.016e-04, train_time=1.703 +[gpub078:0/16] 2024-01-26 06:20:35,043 (trainer:737) INFO: 11epoch:train:14901-15000batch: iter_time=8.957e-05, forward_time=0.289, loss_ctc=52.117, loss_att=53.757, acc=0.681, loss=53.265, backward_time=0.413, grad_norm=33.013, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.098, optim0_lr0=3.016e-04, train_time=1.562 +[gpub078:0/16] 2024-01-26 06:59:04,165 (trainer:343) INFO: 11epoch results: [train] iter_time=0.350, forward_time=0.349, loss_ctc=57.415, loss_att=56.047, acc=0.706, loss=56.457, backward_time=0.426, grad_norm=31.927, clip=100.000, loss_scale=6.305e+28, optim_step_time=0.099, optim0_lr0=3.087e-04, train_time=2.064, time=8 hours, 36 minutes and 33.3 seconds, total_count=195000, gpu_max_cached_mem_GB=42.420, [valid] loss_ctc=48.419, cer_ctc=0.247, loss_att=46.736, acc=0.624, cer=0.373, wer=0.997, loss=47.241, time=38 minutes and 2.32 seconds, total_count=60723, gpu_max_cached_mem_GB=42.420 +[gpub078:0/16] 2024-01-26 06:59:14,438 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub078:0/16] 2024-01-26 06:59:14,508 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/6epoch.pth +[gpub078:0/16] 2024-01-26 06:59:14,508 (trainer:272) INFO: 12/45epoch started. Estimated time to finish: 1 week, 6 days and 3 hours +[gpub078:0/16] 2024-01-26 06:59:14,517 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub078:0/16] 2024-01-26 06:59:32,708 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 06:59:36,245 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 06:59:36,245 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub078:0/16] 2024-01-26 06:59:36,249 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 07:07:12,416 (trainer:737) INFO: 12epoch:train:1-100batch: iter_time=3.204, forward_time=0.299, loss_ctc=53.331, loss_att=61.368, acc=0.708, loss=58.957, backward_time=0.413, grad_norm=28.168, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.095, optim0_lr0=3.015e-04, train_time=4.779 +[gpub078:0/16] 2024-01-26 07:09:45,820 (trainer:737) INFO: 12epoch:train:101-200batch: iter_time=8.544e-05, forward_time=0.329, loss_ctc=55.144, loss_att=51.336, acc=0.723, loss=52.478, backward_time=0.409, grad_norm=31.208, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.094, optim0_lr0=3.014e-04, train_time=1.534 +[gpub078:0/16] 2024-01-26 07:12:32,802 (trainer:737) INFO: 12epoch:train:201-300batch: iter_time=9.399e-05, forward_time=0.331, loss_ctc=51.746, loss_att=48.490, acc=0.715, loss=49.467, backward_time=0.401, grad_norm=30.767, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.094, optim0_lr0=3.013e-04, train_time=1.669 +[gpub078:0/16] 2024-01-26 07:14:56,754 (trainer:737) INFO: 12epoch:train:301-400batch: iter_time=1.733e-04, forward_time=0.298, loss_ctc=67.569, loss_att=52.145, acc=0.728, loss=56.772, backward_time=0.426, grad_norm=37.904, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.098, optim0_lr0=3.012e-04, train_time=1.440 +[gpub078:0/16] 2024-01-26 07:17:27,377 (trainer:737) INFO: 12epoch:train:401-500batch: iter_time=9.153e-05, forward_time=0.313, loss_ctc=55.223, loss_att=54.649, acc=0.688, loss=54.821, backward_time=0.404, grad_norm=34.450, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.095, optim0_lr0=3.011e-04, train_time=1.506 +[gpub078:0/16] 2024-01-26 07:19:54,470 (trainer:737) INFO: 12epoch:train:501-600batch: iter_time=8.570e-05, forward_time=0.317, loss_ctc=59.771, loss_att=57.725, acc=0.701, loss=58.339, backward_time=0.404, grad_norm=34.496, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.093, optim0_lr0=3.010e-04, train_time=1.471 +[gpub078:0/16] 2024-01-26 07:22:26,476 (trainer:737) INFO: 12epoch:train:601-700batch: iter_time=9.706e-05, forward_time=0.324, loss_ctc=62.374, loss_att=57.657, acc=0.711, loss=59.072, backward_time=0.409, grad_norm=32.825, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.093, optim0_lr0=3.009e-04, train_time=1.520 +[gpub078:0/16] 2024-01-26 07:25:12,140 (trainer:737) INFO: 12epoch:train:701-800batch: iter_time=9.615e-05, forward_time=0.328, loss_ctc=58.983, loss_att=55.575, acc=0.714, loss=56.597, backward_time=0.438, grad_norm=30.274, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.107, optim0_lr0=3.008e-04, train_time=1.657 +[gpub078:0/16] 2024-01-26 07:27:38,914 (trainer:737) INFO: 12epoch:train:801-900batch: iter_time=1.020e-04, forward_time=0.313, loss_ctc=58.673, loss_att=54.602, acc=0.722, loss=55.823, backward_time=0.409, grad_norm=31.548, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.095, optim0_lr0=3.007e-04, train_time=1.468 +[gpub078:0/16] 2024-01-26 07:30:13,652 (trainer:737) INFO: 12epoch:train:901-1000batch: iter_time=1.052e-04, forward_time=0.307, loss_ctc=66.442, loss_att=62.626, acc=0.702, loss=63.771, backward_time=0.410, grad_norm=38.467, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.093, optim0_lr0=3.006e-04, train_time=1.547 +[gpub078:0/16] 2024-01-26 07:32:55,792 (trainer:737) INFO: 12epoch:train:1001-1100batch: iter_time=1.082e-04, forward_time=0.340, loss_ctc=51.416, loss_att=52.654, acc=0.719, loss=52.283, backward_time=0.413, grad_norm=30.217, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.094, optim0_lr0=3.006e-04, train_time=1.620 +[gpub078:0/16] 2024-01-26 07:35:23,846 (trainer:737) INFO: 12epoch:train:1101-1200batch: iter_time=0.001, forward_time=0.315, loss_ctc=69.585, loss_att=55.490, acc=0.737, loss=59.719, backward_time=0.446, grad_norm=37.330, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.103, optim0_lr0=3.005e-04, train_time=1.481 +[gpub078:0/16] 2024-01-26 07:37:14,162 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub078:0/16] 2024-01-26 07:37:33,386 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 07:37:37,044 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 07:37:37,044 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub078:0/16] 2024-01-26 07:37:37,047 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 07:44:10,499 (trainer:737) INFO: 12epoch:train:1201-1300batch: iter_time=3.536, forward_time=0.333, loss_ctc=52.175, loss_att=53.217, acc=0.716, loss=52.905, backward_time=0.402, grad_norm=28.810, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=3.004e-04, train_time=5.266 +[gpub078:0/16] 2024-01-26 07:46:44,568 (trainer:737) INFO: 12epoch:train:1301-1400batch: iter_time=8.194e-05, forward_time=0.322, loss_ctc=52.014, loss_att=51.849, acc=0.722, loss=51.899, backward_time=0.418, grad_norm=29.443, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.097, optim0_lr0=3.003e-04, train_time=1.540 +[gpub078:0/16] 2024-01-26 07:49:18,770 (trainer:737) INFO: 12epoch:train:1401-1500batch: iter_time=8.174e-05, forward_time=0.332, loss_ctc=53.890, loss_att=50.906, acc=0.710, loss=51.801, backward_time=0.431, grad_norm=30.600, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.100, optim0_lr0=3.002e-04, train_time=1.542 +[gpub078:0/16] 2024-01-26 07:51:58,601 (trainer:737) INFO: 12epoch:train:1501-1600batch: iter_time=8.795e-05, forward_time=0.294, loss_ctc=60.646, loss_att=56.766, acc=0.709, loss=57.930, backward_time=0.401, grad_norm=33.051, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=3.001e-04, train_time=1.598 +[gpub078:0/16] 2024-01-26 07:54:32,513 (trainer:737) INFO: 12epoch:train:1601-1700batch: iter_time=8.716e-05, forward_time=0.318, loss_ctc=61.928, loss_att=49.732, acc=0.699, loss=53.391, backward_time=0.411, grad_norm=38.976, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.095, optim0_lr0=3.000e-04, train_time=1.539 +[gpub078:0/16] 2024-01-26 07:57:06,316 (trainer:737) INFO: 12epoch:train:1701-1800batch: iter_time=8.892e-05, forward_time=0.345, loss_ctc=48.180, loss_att=51.707, acc=0.693, loss=50.649, backward_time=0.414, grad_norm=35.255, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.094, optim0_lr0=2.999e-04, train_time=1.538 +[gpub078:0/16] 2024-01-26 07:59:54,796 (trainer:737) INFO: 12epoch:train:1801-1900batch: iter_time=9.202e-05, forward_time=0.326, loss_ctc=59.552, loss_att=56.837, acc=0.712, loss=57.651, backward_time=0.435, grad_norm=33.585, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.098, optim0_lr0=2.998e-04, train_time=1.684 +[gpub078:0/16] 2024-01-26 08:02:21,821 (trainer:737) INFO: 12epoch:train:1901-2000batch: iter_time=9.336e-05, forward_time=0.338, loss_ctc=67.114, loss_att=58.869, acc=0.701, loss=61.342, backward_time=0.409, grad_norm=33.932, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=2.997e-04, train_time=1.471 +[gpub078:0/16] 2024-01-26 08:04:50,977 (trainer:737) INFO: 12epoch:train:2001-2100batch: iter_time=8.860e-05, forward_time=0.289, loss_ctc=53.564, loss_att=54.566, acc=0.709, loss=54.266, backward_time=0.401, grad_norm=29.240, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=2.997e-04, train_time=1.491 +[gpub078:0/16] 2024-01-26 08:07:24,441 (trainer:737) INFO: 12epoch:train:2101-2200batch: iter_time=9.242e-05, forward_time=0.332, loss_ctc=68.483, loss_att=57.937, acc=0.709, loss=61.100, backward_time=0.433, grad_norm=42.573, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.095, optim0_lr0=2.996e-04, train_time=1.534 +[gpub078:0/16] 2024-01-26 08:10:11,706 (trainer:737) INFO: 12epoch:train:2201-2300batch: iter_time=8.536e-05, forward_time=0.335, loss_ctc=51.721, loss_att=56.900, acc=0.696, loss=55.347, backward_time=0.443, grad_norm=32.307, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.101, optim0_lr0=2.995e-04, train_time=1.672 +[gpub078:0/16] 2024-01-26 08:12:29,917 (trainer:737) INFO: 12epoch:train:2301-2400batch: iter_time=8.626e-05, forward_time=0.293, loss_ctc=54.703, loss_att=44.278, acc=0.733, loss=47.405, backward_time=0.401, grad_norm=29.755, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=2.994e-04, train_time=1.382 +[gpub078:0/16] 2024-01-26 08:15:00,368 (trainer:737) INFO: 12epoch:train:2401-2500batch: iter_time=9.040e-05, forward_time=0.288, loss_ctc=62.992, loss_att=53.488, acc=0.724, loss=56.339, backward_time=0.399, grad_norm=39.986, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=2.993e-04, train_time=1.504 +[gpub078:0/16] 2024-01-26 08:15:20,396 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub078:0/16] 2024-01-26 08:15:39,597 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 08:15:43,342 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 08:15:43,342 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub078:0/16] 2024-01-26 08:15:43,347 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 08:24:02,327 (trainer:737) INFO: 12epoch:train:2501-2600batch: iter_time=3.635, forward_time=0.335, loss_ctc=52.104, loss_att=60.390, acc=0.714, loss=57.904, backward_time=0.413, grad_norm=28.822, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.095, optim0_lr0=2.992e-04, train_time=5.419 +[gpub078:0/16] 2024-01-26 08:27:11,468 (trainer:737) INFO: 12epoch:train:2601-2700batch: iter_time=9.007e-05, forward_time=0.344, loss_ctc=53.457, loss_att=50.422, acc=0.729, loss=51.332, backward_time=0.404, grad_norm=29.572, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.094, optim0_lr0=2.991e-04, train_time=1.891 +[gpub078:0/16] 2024-01-26 08:30:27,032 (trainer:737) INFO: 12epoch:train:2701-2800batch: iter_time=8.530e-05, forward_time=0.287, loss_ctc=51.157, loss_att=47.934, acc=0.721, loss=48.901, backward_time=0.396, grad_norm=30.145, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=2.990e-04, train_time=1.956 +[gpub078:0/16] 2024-01-26 08:33:35,434 (trainer:737) INFO: 12epoch:train:2801-2900batch: iter_time=8.674e-05, forward_time=0.327, loss_ctc=63.967, loss_att=51.493, acc=0.731, loss=55.235, backward_time=0.447, grad_norm=35.531, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.100, optim0_lr0=2.989e-04, train_time=1.884 +[gpub078:0/16] 2024-01-26 08:36:38,590 (trainer:737) INFO: 12epoch:train:2901-3000batch: iter_time=8.864e-05, forward_time=0.292, loss_ctc=53.944, loss_att=53.675, acc=0.694, loss=53.756, backward_time=0.405, grad_norm=33.176, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.093, optim0_lr0=2.989e-04, train_time=1.831 +[gpub078:0/16] 2024-01-26 08:39:55,595 (trainer:737) INFO: 12epoch:train:3001-3100batch: iter_time=8.864e-05, forward_time=0.323, loss_ctc=57.814, loss_att=55.814, acc=0.710, loss=56.414, backward_time=0.406, grad_norm=42.540, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.988e-04, train_time=1.969 +[gpub078:0/16] 2024-01-26 08:43:01,442 (trainer:737) INFO: 12epoch:train:3101-3200batch: iter_time=9.445e-05, forward_time=0.391, loss_ctc=59.001, loss_att=56.546, acc=0.718, loss=57.282, backward_time=0.430, grad_norm=31.786, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.107, optim0_lr0=2.987e-04, train_time=1.859 +[gpub078:0/16] 2024-01-26 08:46:02,044 (trainer:737) INFO: 12epoch:train:3201-3300batch: iter_time=9.027e-05, forward_time=0.290, loss_ctc=57.871, loss_att=55.121, acc=0.717, loss=55.946, backward_time=0.403, grad_norm=30.027, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.986e-04, train_time=1.806 +[gpub078:0/16] 2024-01-26 08:49:10,620 (trainer:737) INFO: 12epoch:train:3301-3400batch: iter_time=8.732e-05, forward_time=0.295, loss_ctc=56.818, loss_att=53.598, acc=0.726, loss=54.564, backward_time=0.404, grad_norm=30.741, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.985e-04, train_time=1.886 +[gpub078:0/16] 2024-01-26 08:51:41,431 (trainer:737) INFO: 12epoch:train:3401-3500batch: iter_time=8.495e-05, forward_time=0.330, loss_ctc=63.421, loss_att=62.215, acc=0.707, loss=62.577, backward_time=0.404, grad_norm=42.191, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.094, optim0_lr0=2.984e-04, train_time=1.507 +[gpub078:0/16] 2024-01-26 08:55:02,493 (trainer:737) INFO: 12epoch:train:3501-3600batch: iter_time=7.896e-04, forward_time=0.362, loss_ctc=50.447, loss_att=52.044, acc=0.723, loss=51.565, backward_time=0.438, grad_norm=28.875, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.098, optim0_lr0=2.983e-04, train_time=2.011 +[gpub078:0/16] 2024-01-26 08:57:52,879 (trainer:737) INFO: 12epoch:train:3601-3700batch: iter_time=8.106e-05, forward_time=0.290, loss_ctc=66.681, loss_att=53.440, acc=0.740, loss=57.412, backward_time=0.402, grad_norm=38.346, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.092, optim0_lr0=2.982e-04, train_time=1.704 +[gpub078:0/16] 2024-01-26 08:59:20,950 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub078:0/16] 2024-01-26 08:59:40,157 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 08:59:44,078 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 08:59:44,078 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub078:0/16] 2024-01-26 08:59:44,081 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 09:06:34,108 (trainer:737) INFO: 12epoch:train:3701-3800batch: iter_time=3.738, forward_time=0.344, loss_ctc=51.665, loss_att=50.962, acc=0.728, loss=51.172, backward_time=0.406, grad_norm=28.314, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.981e-04, train_time=5.212 +[gpub078:0/16] 2024-01-26 09:09:47,645 (trainer:737) INFO: 12epoch:train:3801-3900batch: iter_time=8.370e-05, forward_time=0.347, loss_ctc=51.359, loss_att=53.431, acc=0.723, loss=52.809, backward_time=0.457, grad_norm=29.814, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.102, optim0_lr0=2.981e-04, train_time=1.935 +[gpub078:0/16] 2024-01-26 09:12:09,677 (trainer:737) INFO: 12epoch:train:3901-4000batch: iter_time=7.762e-05, forward_time=0.290, loss_ctc=52.863, loss_att=49.864, acc=0.722, loss=50.763, backward_time=0.402, grad_norm=30.121, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.980e-04, train_time=1.420 +[gpub078:0/16] 2024-01-26 09:14:58,047 (trainer:737) INFO: 12epoch:train:4001-4100batch: iter_time=8.426e-05, forward_time=0.297, loss_ctc=60.532, loss_att=55.234, acc=0.724, loss=56.824, backward_time=0.407, grad_norm=33.958, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.095, optim0_lr0=2.979e-04, train_time=1.683 +[gpub078:0/16] 2024-01-26 09:17:42,913 (trainer:737) INFO: 12epoch:train:4101-4200batch: iter_time=8.001e-05, forward_time=0.334, loss_ctc=60.553, loss_att=48.083, acc=0.718, loss=51.824, backward_time=0.401, grad_norm=37.653, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.094, optim0_lr0=2.978e-04, train_time=1.648 +[gpub078:0/16] 2024-01-26 09:20:19,581 (trainer:737) INFO: 12epoch:train:4201-4300batch: iter_time=8.478e-05, forward_time=0.405, loss_ctc=47.207, loss_att=50.945, acc=0.701, loss=49.824, backward_time=0.420, grad_norm=28.948, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.096, optim0_lr0=2.977e-04, train_time=1.566 +[gpub078:0/16] 2024-01-26 09:23:00,380 (trainer:737) INFO: 12epoch:train:4301-4400batch: iter_time=9.102e-05, forward_time=0.291, loss_ctc=58.193, loss_att=55.906, acc=0.723, loss=56.592, backward_time=0.401, grad_norm=31.519, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.976e-04, train_time=1.608 +[gpub078:0/16] 2024-01-26 09:25:51,402 (trainer:737) INFO: 12epoch:train:4401-4500batch: iter_time=8.673e-05, forward_time=0.344, loss_ctc=66.091, loss_att=57.195, acc=0.720, loss=59.864, backward_time=0.415, grad_norm=35.351, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.094, optim0_lr0=2.975e-04, train_time=1.710 +[gpub078:0/16] 2024-01-26 09:28:29,343 (trainer:737) INFO: 12epoch:train:4501-4600batch: iter_time=8.755e-05, forward_time=0.359, loss_ctc=53.445, loss_att=54.831, acc=0.722, loss=54.415, backward_time=0.427, grad_norm=29.155, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.105, optim0_lr0=2.974e-04, train_time=1.579 +[gpub078:0/16] 2024-01-26 09:31:09,434 (trainer:737) INFO: 12epoch:train:4601-4700batch: iter_time=2.753e-04, forward_time=0.299, loss_ctc=65.011, loss_att=57.179, acc=0.715, loss=59.529, backward_time=0.402, grad_norm=40.094, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.973e-04, train_time=1.600 +[gpub078:0/16] 2024-01-26 09:33:56,906 (trainer:737) INFO: 12epoch:train:4701-4800batch: iter_time=9.264e-05, forward_time=0.298, loss_ctc=51.113, loss_att=56.685, acc=0.713, loss=55.014, backward_time=0.411, grad_norm=30.816, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.094, optim0_lr0=2.973e-04, train_time=1.676 +[gpub078:0/16] 2024-01-26 09:36:23,013 (trainer:737) INFO: 12epoch:train:4801-4900batch: iter_time=9.800e-05, forward_time=0.315, loss_ctc=54.174, loss_att=44.172, acc=0.742, loss=47.173, backward_time=0.414, grad_norm=30.436, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.093, optim0_lr0=2.972e-04, train_time=1.461 +[gpub078:0/16] 2024-01-26 09:38:50,168 (trainer:737) INFO: 12epoch:train:4901-5000batch: iter_time=1.480e-04, forward_time=0.306, loss_ctc=60.832, loss_att=50.799, acc=0.741, loss=53.809, backward_time=0.433, grad_norm=37.057, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.100, optim0_lr0=2.971e-04, train_time=1.471 +[gpub078:0/16] 2024-01-26 09:39:10,197 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub078:0/16] 2024-01-26 09:39:29,247 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 09:39:32,918 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 09:39:32,918 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub078:0/16] 2024-01-26 09:39:32,933 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 09:48:03,082 (trainer:737) INFO: 12epoch:train:5001-5100batch: iter_time=3.723, forward_time=0.326, loss_ctc=51.572, loss_att=60.335, acc=0.713, loss=57.706, backward_time=0.406, grad_norm=29.829, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.094, optim0_lr0=2.970e-04, train_time=5.529 +[gpub078:0/16] 2024-01-26 09:50:48,142 (trainer:737) INFO: 12epoch:train:5101-5200batch: iter_time=8.810e-05, forward_time=0.334, loss_ctc=52.868, loss_att=48.984, acc=0.728, loss=50.149, backward_time=0.412, grad_norm=29.699, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.094, optim0_lr0=2.969e-04, train_time=1.650 +[gpub078:0/16] 2024-01-26 09:53:25,089 (trainer:737) INFO: 12epoch:train:5201-5300batch: iter_time=9.242e-05, forward_time=0.288, loss_ctc=51.069, loss_att=48.134, acc=0.714, loss=49.015, backward_time=0.400, grad_norm=28.612, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.968e-04, train_time=1.569 +[gpub078:0/16] 2024-01-26 09:56:17,347 (trainer:737) INFO: 12epoch:train:5301-5400batch: iter_time=1.059e-04, forward_time=0.350, loss_ctc=62.377, loss_att=51.039, acc=0.728, loss=54.440, backward_time=0.470, grad_norm=34.854, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.101, optim0_lr0=2.967e-04, train_time=1.722 +[gpub078:0/16] 2024-01-26 09:58:51,356 (trainer:737) INFO: 12epoch:train:5401-5500batch: iter_time=8.940e-05, forward_time=0.288, loss_ctc=53.540, loss_att=53.571, acc=0.682, loss=53.562, backward_time=0.400, grad_norm=33.609, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.967e-04, train_time=1.540 +[gpub078:0/16] 2024-01-26 10:01:27,005 (trainer:737) INFO: 12epoch:train:5501-5600batch: iter_time=9.129e-05, forward_time=0.359, loss_ctc=56.786, loss_att=56.184, acc=0.702, loss=56.365, backward_time=0.410, grad_norm=33.364, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.094, optim0_lr0=2.966e-04, train_time=1.556 +[gpub078:0/16] 2024-01-26 10:04:19,584 (trainer:737) INFO: 12epoch:train:5601-5700batch: iter_time=9.263e-05, forward_time=0.316, loss_ctc=58.503, loss_att=55.815, acc=0.715, loss=56.621, backward_time=0.409, grad_norm=32.673, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.095, optim0_lr0=2.965e-04, train_time=1.726 +[gpub078:0/16] 2024-01-26 10:06:54,587 (trainer:737) INFO: 12epoch:train:5701-5800batch: iter_time=9.703e-05, forward_time=0.328, loss_ctc=56.814, loss_att=53.968, acc=0.708, loss=54.822, backward_time=0.436, grad_norm=31.983, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.098, optim0_lr0=2.964e-04, train_time=1.550 +[gpub078:0/16] 2024-01-26 10:09:12,295 (trainer:737) INFO: 12epoch:train:5801-5900batch: iter_time=8.748e-05, forward_time=0.302, loss_ctc=55.654, loss_att=53.288, acc=0.719, loss=53.998, backward_time=0.408, grad_norm=32.377, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.963e-04, train_time=1.377 +[gpub078:0/16] 2024-01-26 10:12:14,651 (trainer:737) INFO: 12epoch:train:5901-6000batch: iter_time=9.573e-05, forward_time=0.318, loss_ctc=59.662, loss_att=57.743, acc=0.703, loss=58.319, backward_time=0.410, grad_norm=46.076, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.962e-04, train_time=1.824 +[gpub078:0/16] 2024-01-26 10:14:43,031 (trainer:737) INFO: 12epoch:train:6001-6100batch: iter_time=5.799e-04, forward_time=0.372, loss_ctc=49.517, loss_att=50.676, acc=0.719, loss=50.328, backward_time=0.448, grad_norm=52.637, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.110, optim0_lr0=2.961e-04, train_time=1.482 +[gpub078:0/16] 2024-01-26 10:17:27,679 (trainer:737) INFO: 12epoch:train:6101-6200batch: iter_time=1.025e-04, forward_time=0.291, loss_ctc=66.589, loss_att=54.770, acc=0.737, loss=58.316, backward_time=0.403, grad_norm=38.511, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.960e-04, train_time=1.648 +[gpub078:0/16] 2024-01-26 10:18:55,808 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub078:0/16] 2024-01-26 10:19:14,944 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 10:19:18,550 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 10:19:18,550 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub078:0/16] 2024-01-26 10:19:18,582 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 10:25:42,540 (trainer:737) INFO: 12epoch:train:6201-6300batch: iter_time=3.350, forward_time=0.333, loss_ctc=51.215, loss_att=52.107, acc=0.718, loss=51.839, backward_time=0.410, grad_norm=29.553, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.960e-04, train_time=4.949 +[gpub078:0/16] 2024-01-26 10:28:12,984 (trainer:737) INFO: 12epoch:train:6301-6400batch: iter_time=8.324e-05, forward_time=0.292, loss_ctc=51.031, loss_att=53.162, acc=0.727, loss=52.522, backward_time=0.406, grad_norm=30.052, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.959e-04, train_time=1.504 +[gpub078:0/16] 2024-01-26 10:31:01,923 (trainer:737) INFO: 12epoch:train:6401-6500batch: iter_time=8.110e-05, forward_time=0.335, loss_ctc=52.869, loss_att=50.188, acc=0.724, loss=50.992, backward_time=0.467, grad_norm=29.024, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.100, optim0_lr0=2.958e-04, train_time=1.688 +[gpub078:0/16] 2024-01-26 10:34:03,997 (trainer:737) INFO: 12epoch:train:6501-6600batch: iter_time=9.642e-05, forward_time=0.312, loss_ctc=60.650, loss_att=55.563, acc=0.725, loss=57.089, backward_time=0.411, grad_norm=35.610, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.095, optim0_lr0=2.957e-04, train_time=1.821 +[gpub078:0/16] 2024-01-26 10:36:19,246 (trainer:737) INFO: 12epoch:train:6601-6700batch: iter_time=9.753e-05, forward_time=0.313, loss_ctc=59.260, loss_att=47.966, acc=0.718, loss=51.354, backward_time=0.405, grad_norm=36.313, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.956e-04, train_time=1.352 +[gpub078:0/16] 2024-01-26 10:39:02,392 (trainer:737) INFO: 12epoch:train:6701-6800batch: iter_time=9.421e-05, forward_time=0.349, loss_ctc=46.493, loss_att=51.190, acc=0.703, loss=49.781, backward_time=0.446, grad_norm=29.567, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.103, optim0_lr0=2.955e-04, train_time=1.631 +[gpub078:0/16] 2024-01-26 10:42:07,288 (trainer:737) INFO: 12epoch:train:6801-6900batch: iter_time=9.424e-05, forward_time=0.296, loss_ctc=58.445, loss_att=56.434, acc=0.725, loss=57.037, backward_time=0.408, grad_norm=31.681, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.093, optim0_lr0=2.954e-04, train_time=1.847 +[gpub078:0/16] 2024-01-26 10:44:41,722 (trainer:737) INFO: 12epoch:train:6901-7000batch: iter_time=1.094e-04, forward_time=0.339, loss_ctc=65.676, loss_att=57.218, acc=0.721, loss=59.755, backward_time=0.407, grad_norm=36.273, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.095, optim0_lr0=2.954e-04, train_time=1.545 +[gpub078:0/16] 2024-01-26 10:47:13,701 (trainer:737) INFO: 12epoch:train:7001-7100batch: iter_time=1.135e-04, forward_time=0.295, loss_ctc=52.947, loss_att=54.890, acc=0.722, loss=54.307, backward_time=0.429, grad_norm=29.560, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.095, optim0_lr0=2.953e-04, train_time=1.521 +[gpub078:0/16] 2024-01-26 10:50:02,386 (trainer:737) INFO: 12epoch:train:7101-7200batch: iter_time=3.471e-04, forward_time=0.326, loss_ctc=63.773, loss_att=56.373, acc=0.721, loss=58.593, backward_time=0.428, grad_norm=43.023, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.096, optim0_lr0=2.952e-04, train_time=1.687 +[gpub078:0/16] 2024-01-26 10:52:45,256 (trainer:737) INFO: 12epoch:train:7201-7300batch: iter_time=9.825e-05, forward_time=0.330, loss_ctc=50.489, loss_att=56.559, acc=0.713, loss=54.738, backward_time=0.412, grad_norm=30.536, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.094, optim0_lr0=2.951e-04, train_time=1.627 +[gpub078:0/16] 2024-01-26 10:55:03,872 (trainer:737) INFO: 12epoch:train:7301-7400batch: iter_time=9.421e-05, forward_time=0.289, loss_ctc=53.630, loss_att=43.898, acc=0.743, loss=46.817, backward_time=0.402, grad_norm=30.678, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.093, optim0_lr0=2.950e-04, train_time=1.387 +[gpub078:0/16] 2024-01-26 10:57:53,625 (trainer:737) INFO: 12epoch:train:7401-7500batch: iter_time=8.586e-05, forward_time=0.366, loss_ctc=60.308, loss_att=51.266, acc=0.741, loss=53.978, backward_time=0.431, grad_norm=35.625, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.100, optim0_lr0=2.949e-04, train_time=1.697 +[gpub078:0/16] 2024-01-26 10:58:13,666 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub078:0/16] 2024-01-26 10:58:32,733 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 10:58:36,348 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 10:58:36,348 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub078:0/16] 2024-01-26 10:58:36,352 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 11:07:03,011 (trainer:737) INFO: 12epoch:train:7501-7600batch: iter_time=3.540, forward_time=0.331, loss_ctc=51.261, loss_att=59.570, acc=0.712, loss=57.077, backward_time=0.405, grad_norm=29.533, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.094, optim0_lr0=2.948e-04, train_time=5.494 +[gpub078:0/16] 2024-01-26 11:10:01,166 (trainer:737) INFO: 12epoch:train:7601-7700batch: iter_time=9.282e-05, forward_time=0.288, loss_ctc=51.956, loss_att=48.383, acc=0.729, loss=49.455, backward_time=0.399, grad_norm=29.155, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.093, optim0_lr0=2.948e-04, train_time=1.781 +[gpub078:0/16] 2024-01-26 11:12:53,344 (trainer:737) INFO: 12epoch:train:7701-7800batch: iter_time=8.331e-04, forward_time=0.372, loss_ctc=50.074, loss_att=47.217, acc=0.719, loss=48.074, backward_time=0.425, grad_norm=28.071, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.098, optim0_lr0=2.947e-04, train_time=1.722 +[gpub078:0/16] 2024-01-26 11:15:55,898 (trainer:737) INFO: 12epoch:train:7801-7900batch: iter_time=8.495e-05, forward_time=0.290, loss_ctc=61.538, loss_att=49.756, acc=0.733, loss=53.291, backward_time=0.403, grad_norm=34.681, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.093, optim0_lr0=2.946e-04, train_time=1.825 +[gpub078:0/16] 2024-01-26 11:18:48,072 (trainer:737) INFO: 12epoch:train:7901-8000batch: iter_time=8.532e-05, forward_time=0.331, loss_ctc=53.485, loss_att=53.206, acc=0.685, loss=53.290, backward_time=0.406, grad_norm=36.083, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.094, optim0_lr0=2.945e-04, train_time=1.721 +[gpub078:0/16] 2024-01-26 11:21:50,015 (trainer:737) INFO: 12epoch:train:8001-8100batch: iter_time=7.961e-05, forward_time=0.289, loss_ctc=56.453, loss_att=55.813, acc=0.704, loss=56.005, backward_time=0.402, grad_norm=35.625, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.095, optim0_lr0=2.944e-04, train_time=1.820 +[gpub078:0/16] 2024-01-26 11:24:37,976 (trainer:737) INFO: 12epoch:train:8101-8200batch: iter_time=8.381e-05, forward_time=0.342, loss_ctc=58.678, loss_att=55.900, acc=0.714, loss=56.734, backward_time=0.455, grad_norm=34.044, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.100, optim0_lr0=2.943e-04, train_time=1.679 +[gpub078:0/16] 2024-01-26 11:27:52,862 (trainer:737) INFO: 12epoch:train:8201-8300batch: iter_time=1.097e-04, forward_time=0.361, loss_ctc=56.595, loss_att=53.555, acc=0.709, loss=54.467, backward_time=0.409, grad_norm=30.321, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.095, optim0_lr0=2.942e-04, train_time=1.949 +[gpub078:0/16] 2024-01-26 11:30:43,656 (trainer:737) INFO: 12epoch:train:8301-8400batch: iter_time=8.766e-05, forward_time=0.289, loss_ctc=55.239, loss_att=52.726, acc=0.721, loss=53.480, backward_time=0.399, grad_norm=30.355, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.093, optim0_lr0=2.942e-04, train_time=1.707 +[gpub078:0/16] 2024-01-26 11:33:57,117 (trainer:737) INFO: 12epoch:train:8401-8500batch: iter_time=8.962e-05, forward_time=0.358, loss_ctc=59.327, loss_att=59.053, acc=0.702, loss=59.135, backward_time=0.454, grad_norm=41.825, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.105, optim0_lr0=2.941e-04, train_time=1.935 +[gpub078:0/16] 2024-01-26 11:36:51,792 (trainer:737) INFO: 12epoch:train:8501-8600batch: iter_time=9.830e-05, forward_time=0.330, loss_ctc=48.908, loss_att=50.434, acc=0.718, loss=49.976, backward_time=0.417, grad_norm=27.865, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.095, optim0_lr0=2.940e-04, train_time=1.746 +[gpub078:0/16] 2024-01-26 11:39:54,422 (trainer:737) INFO: 12epoch:train:8601-8700batch: iter_time=9.456e-05, forward_time=0.290, loss_ctc=64.639, loss_att=52.131, acc=0.739, loss=55.884, backward_time=0.401, grad_norm=38.545, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.093, optim0_lr0=2.939e-04, train_time=1.826 +[gpub078:0/16] 2024-01-26 11:41:30,535 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub078:0/16] 2024-01-26 11:41:50,094 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 11:41:53,682 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 11:41:53,682 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub078:0/16] 2024-01-26 11:41:53,686 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 11:48:25,273 (trainer:737) INFO: 12epoch:train:8701-8800batch: iter_time=3.563, forward_time=0.345, loss_ctc=50.323, loss_att=51.178, acc=0.720, loss=50.921, backward_time=0.416, grad_norm=28.623, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.095, optim0_lr0=2.938e-04, train_time=5.108 +[gpub078:0/16] 2024-01-26 11:51:24,462 (trainer:737) INFO: 12epoch:train:8801-8900batch: iter_time=8.528e-05, forward_time=0.343, loss_ctc=50.273, loss_att=53.050, acc=0.728, loss=52.217, backward_time=0.405, grad_norm=28.061, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.096, optim0_lr0=2.937e-04, train_time=1.792 +[gpub078:0/16] 2024-01-26 11:53:58,219 (trainer:737) INFO: 12epoch:train:8901-9000batch: iter_time=9.348e-05, forward_time=0.290, loss_ctc=52.389, loss_att=49.879, acc=0.725, loss=50.632, backward_time=0.402, grad_norm=58.615, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.093, optim0_lr0=2.937e-04, train_time=1.537 +[gpub078:0/16] 2024-01-26 11:57:03,623 (trainer:737) INFO: 12epoch:train:9001-9100batch: iter_time=5.523e-04, forward_time=0.351, loss_ctc=59.104, loss_att=54.599, acc=0.727, loss=55.950, backward_time=0.429, grad_norm=33.255, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.106, optim0_lr0=2.936e-04, train_time=1.854 +[gpub078:0/16] 2024-01-26 11:59:49,201 (trainer:737) INFO: 12epoch:train:9101-9200batch: iter_time=8.338e-05, forward_time=0.324, loss_ctc=58.477, loss_att=47.467, acc=0.722, loss=50.770, backward_time=0.410, grad_norm=34.726, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.096, optim0_lr0=2.935e-04, train_time=1.655 +[gpub078:0/16] 2024-01-26 12:02:30,375 (trainer:737) INFO: 12epoch:train:9201-9300batch: iter_time=8.411e-05, forward_time=0.288, loss_ctc=46.148, loss_att=50.934, acc=0.706, loss=49.499, backward_time=0.401, grad_norm=29.209, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.934e-04, train_time=1.611 +[gpub078:0/16] 2024-01-26 12:05:33,468 (trainer:737) INFO: 12epoch:train:9301-9400batch: iter_time=8.274e-05, forward_time=0.350, loss_ctc=57.769, loss_att=55.770, acc=0.725, loss=56.370, backward_time=0.473, grad_norm=31.585, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.108, optim0_lr0=2.933e-04, train_time=1.831 +[gpub078:0/16] 2024-01-26 12:08:29,319 (trainer:737) INFO: 12epoch:train:9401-9500batch: iter_time=8.514e-05, forward_time=0.315, loss_ctc=64.899, loss_att=56.548, acc=0.722, loss=59.053, backward_time=0.423, grad_norm=33.839, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.094, optim0_lr0=2.932e-04, train_time=1.758 +[gpub078:0/16] 2024-01-26 12:10:54,630 (trainer:737) INFO: 12epoch:train:9501-9600batch: iter_time=8.995e-05, forward_time=0.291, loss_ctc=51.747, loss_att=54.048, acc=0.726, loss=53.358, backward_time=0.404, grad_norm=28.927, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.094, optim0_lr0=2.931e-04, train_time=1.452 +[gpub078:0/16] 2024-01-26 12:13:58,004 (trainer:737) INFO: 12epoch:train:9601-9700batch: iter_time=9.067e-05, forward_time=0.369, loss_ctc=63.440, loss_att=57.662, acc=0.722, loss=59.395, backward_time=0.422, grad_norm=45.497, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.103, optim0_lr0=2.931e-04, train_time=1.834 +[gpub078:0/16] 2024-01-26 12:16:31,624 (trainer:737) INFO: 12epoch:train:9701-9800batch: iter_time=8.855e-05, forward_time=0.352, loss_ctc=49.732, loss_att=55.963, acc=0.715, loss=54.094, backward_time=0.406, grad_norm=29.961, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.094, optim0_lr0=2.930e-04, train_time=1.536 +[gpub078:0/16] 2024-01-26 12:19:32,971 (trainer:737) INFO: 12epoch:train:9801-9900batch: iter_time=8.635e-05, forward_time=0.288, loss_ctc=53.002, loss_att=43.555, acc=0.745, loss=46.389, backward_time=0.399, grad_norm=31.873, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.929e-04, train_time=1.813 +[gpub078:0/16] 2024-01-26 12:22:26,250 (trainer:737) INFO: 12epoch:train:9901-10000batch: iter_time=8.298e-05, forward_time=0.404, loss_ctc=61.168, loss_att=50.784, acc=0.746, loss=53.899, backward_time=0.447, grad_norm=34.900, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.114, optim0_lr0=2.928e-04, train_time=1.733 +[gpub078:0/16] 2024-01-26 12:22:46,401 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub078:0/16] 2024-01-26 12:23:05,379 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 12:23:09,313 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 12:23:09,314 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub078:0/16] 2024-01-26 12:23:09,317 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 12:31:55,689 (trainer:737) INFO: 12epoch:train:10001-10100batch: iter_time=4.099, forward_time=0.290, loss_ctc=50.812, loss_att=59.527, acc=0.712, loss=56.913, backward_time=0.403, grad_norm=30.802, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.927e-04, train_time=5.694 +[gpub078:0/16] 2024-01-26 12:34:58,376 (trainer:737) INFO: 12epoch:train:10101-10200batch: iter_time=8.516e-05, forward_time=0.288, loss_ctc=51.796, loss_att=48.110, acc=0.729, loss=49.216, backward_time=0.400, grad_norm=30.337, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.926e-04, train_time=1.827 +[gpub078:0/16] 2024-01-26 12:37:29,345 (trainer:737) INFO: 12epoch:train:10201-10300batch: iter_time=7.859e-05, forward_time=0.354, loss_ctc=49.847, loss_att=47.090, acc=0.719, loss=47.917, backward_time=0.442, grad_norm=30.793, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.105, optim0_lr0=2.926e-04, train_time=1.509 +[gpub078:0/16] 2024-01-26 12:40:44,455 (trainer:737) INFO: 12epoch:train:10301-10400batch: iter_time=9.236e-05, forward_time=0.329, loss_ctc=60.760, loss_att=49.584, acc=0.734, loss=52.937, backward_time=0.408, grad_norm=35.351, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.094, optim0_lr0=2.925e-04, train_time=1.951 +[gpub078:0/16] 2024-01-26 12:43:08,276 (trainer:737) INFO: 12epoch:train:10401-10500batch: iter_time=8.505e-05, forward_time=0.290, loss_ctc=53.414, loss_att=52.763, acc=0.687, loss=52.958, backward_time=0.401, grad_norm=36.153, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.924e-04, train_time=1.438 +[gpub078:0/16] 2024-01-26 12:46:00,479 (trainer:737) INFO: 12epoch:train:10501-10600batch: iter_time=8.742e-05, forward_time=0.289, loss_ctc=56.640, loss_att=55.532, acc=0.706, loss=55.865, backward_time=0.400, grad_norm=33.976, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.923e-04, train_time=1.722 +[gpub078:0/16] 2024-01-26 12:48:54,479 (trainer:737) INFO: 12epoch:train:10601-10700batch: iter_time=8.416e-05, forward_time=0.386, loss_ctc=58.903, loss_att=55.258, acc=0.716, loss=56.351, backward_time=0.472, grad_norm=33.426, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.107, optim0_lr0=2.922e-04, train_time=1.739 +[gpub078:0/16] 2024-01-26 12:51:53,029 (trainer:737) INFO: 12epoch:train:10701-10800batch: iter_time=8.086e-05, forward_time=0.288, loss_ctc=56.136, loss_att=53.320, acc=0.711, loss=54.165, backward_time=0.399, grad_norm=31.357, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.921e-04, train_time=1.786 +[gpub078:0/16] 2024-01-26 12:54:10,855 (trainer:737) INFO: 12epoch:train:10801-10900batch: iter_time=8.162e-05, forward_time=0.291, loss_ctc=54.971, loss_att=52.660, acc=0.723, loss=53.354, backward_time=0.402, grad_norm=33.035, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.093, optim0_lr0=2.921e-04, train_time=1.378 +[gpub078:0/16] 2024-01-26 12:57:16,845 (trainer:737) INFO: 12epoch:train:10901-11000batch: iter_time=8.605e-05, forward_time=0.452, loss_ctc=58.246, loss_att=58.362, acc=0.703, loss=58.327, backward_time=0.425, grad_norm=45.544, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.099, optim0_lr0=2.920e-04, train_time=1.859 +[gpub078:0/16] 2024-01-26 13:00:17,494 (trainer:737) INFO: 12epoch:train:11001-11100batch: iter_time=8.798e-05, forward_time=0.288, loss_ctc=48.687, loss_att=49.926, acc=0.719, loss=49.554, backward_time=0.399, grad_norm=28.333, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.919e-04, train_time=1.806 +[gpub078:0/16] 2024-01-26 13:02:54,354 (trainer:737) INFO: 12epoch:train:11101-11200batch: iter_time=8.180e-05, forward_time=0.289, loss_ctc=65.758, loss_att=54.181, acc=0.741, loss=57.654, backward_time=0.402, grad_norm=41.728, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.918e-04, train_time=1.569 +[gpub078:0/16] 2024-01-26 13:04:25,161 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub078:0/16] 2024-01-26 13:04:44,845 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 13:04:48,603 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 13:04:48,603 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub078:0/16] 2024-01-26 13:04:48,629 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 13:11:51,780 (trainer:737) INFO: 12epoch:train:11201-11300batch: iter_time=3.825, forward_time=0.397, loss_ctc=50.753, loss_att=49.602, acc=0.720, loss=49.948, backward_time=0.417, grad_norm=29.809, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.096, optim0_lr0=2.917e-04, train_time=5.374 +[gpub078:0/16] 2024-01-26 13:14:32,328 (trainer:737) INFO: 12epoch:train:11301-11400batch: iter_time=8.722e-05, forward_time=0.288, loss_ctc=50.430, loss_att=50.334, acc=0.732, loss=50.363, backward_time=0.402, grad_norm=28.546, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.916e-04, train_time=1.605 +[gpub078:0/16] 2024-01-26 13:17:16,595 (trainer:737) INFO: 12epoch:train:11401-11500batch: iter_time=8.121e-05, forward_time=0.289, loss_ctc=52.498, loss_att=49.468, acc=0.720, loss=50.377, backward_time=0.400, grad_norm=29.495, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.916e-04, train_time=1.643 +[gpub078:0/16] 2024-01-26 13:20:02,382 (trainer:737) INFO: 12epoch:train:11501-11600batch: iter_time=7.931e-05, forward_time=0.418, loss_ctc=59.560, loss_att=54.959, acc=0.720, loss=56.340, backward_time=0.436, grad_norm=34.875, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.118, optim0_lr0=2.915e-04, train_time=1.658 +[gpub078:0/16] 2024-01-26 13:23:06,807 (trainer:737) INFO: 12epoch:train:11601-11700batch: iter_time=9.757e-05, forward_time=0.288, loss_ctc=57.787, loss_att=47.620, acc=0.710, loss=50.670, backward_time=0.398, grad_norm=34.726, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.914e-04, train_time=1.844 +[gpub078:0/16] 2024-01-26 13:25:29,638 (trainer:737) INFO: 12epoch:train:11701-11800batch: iter_time=8.064e-05, forward_time=0.287, loss_ctc=46.028, loss_att=49.632, acc=0.703, loss=48.550, backward_time=0.398, grad_norm=29.634, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.913e-04, train_time=1.427 +[gpub078:0/16] 2024-01-26 13:28:11,468 (trainer:737) INFO: 12epoch:train:11801-11900batch: iter_time=9.301e-05, forward_time=0.371, loss_ctc=57.689, loss_att=55.303, acc=0.723, loss=56.018, backward_time=0.432, grad_norm=32.539, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.105, optim0_lr0=2.912e-04, train_time=1.619 +[gpub078:0/16] 2024-01-26 13:30:52,958 (trainer:737) INFO: 12epoch:train:11901-12000batch: iter_time=9.173e-05, forward_time=0.308, loss_ctc=65.161, loss_att=57.173, acc=0.711, loss=59.569, backward_time=0.406, grad_norm=33.799, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.096, optim0_lr0=2.912e-04, train_time=1.614 +[gpub078:0/16] 2024-01-26 13:33:48,338 (trainer:737) INFO: 12epoch:train:12001-12100batch: iter_time=8.988e-05, forward_time=0.290, loss_ctc=51.784, loss_att=53.177, acc=0.718, loss=52.759, backward_time=0.400, grad_norm=45.289, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.911e-04, train_time=1.754 +[gpub078:0/16] 2024-01-26 13:36:13,328 (trainer:737) INFO: 12epoch:train:12101-12200batch: iter_time=8.956e-05, forward_time=0.289, loss_ctc=63.259, loss_att=56.030, acc=0.719, loss=58.199, backward_time=0.402, grad_norm=42.092, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.910e-04, train_time=1.448 +[gpub078:0/16] 2024-01-26 13:39:22,878 (trainer:737) INFO: 12epoch:train:12201-12300batch: iter_time=8.939e-05, forward_time=0.399, loss_ctc=49.741, loss_att=55.131, acc=0.705, loss=53.514, backward_time=0.478, grad_norm=31.556, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.102, optim0_lr0=2.909e-04, train_time=1.897 +[gpub078:0/16] 2024-01-26 13:42:19,641 (trainer:737) INFO: 12epoch:train:12301-12400batch: iter_time=7.951e-05, forward_time=0.291, loss_ctc=52.435, loss_att=42.608, acc=0.741, loss=45.556, backward_time=0.400, grad_norm=29.466, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.094, optim0_lr0=2.908e-04, train_time=1.767 +[gpub078:0/16] 2024-01-26 13:45:14,038 (trainer:737) INFO: 12epoch:train:12401-12500batch: iter_time=8.073e-05, forward_time=0.289, loss_ctc=59.903, loss_att=50.878, acc=0.733, loss=53.585, backward_time=0.399, grad_norm=36.609, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.094, optim0_lr0=2.907e-04, train_time=1.744 +[gpub078:0/16] 2024-01-26 13:45:34,066 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub078:0/16] 2024-01-26 13:45:53,360 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 13:45:57,033 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 13:45:57,033 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub078:0/16] 2024-01-26 13:45:57,036 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 13:54:15,606 (trainer:737) INFO: 12epoch:train:12501-12600batch: iter_time=3.679, forward_time=0.395, loss_ctc=51.019, loss_att=60.090, acc=0.720, loss=57.369, backward_time=0.429, grad_norm=517.471, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.097, optim0_lr0=2.907e-04, train_time=5.415 +[gpub078:0/16] 2024-01-26 13:57:43,184 (trainer:737) INFO: 12epoch:train:12601-12700batch: iter_time=8.701e-05, forward_time=0.288, loss_ctc=51.643, loss_att=49.611, acc=0.736, loss=50.221, backward_time=0.400, grad_norm=28.229, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.906e-04, train_time=2.076 +[gpub078:0/16] 2024-01-26 14:00:27,421 (trainer:737) INFO: 12epoch:train:12701-12800batch: iter_time=7.555e-05, forward_time=0.287, loss_ctc=50.157, loss_att=47.065, acc=0.728, loss=47.993, backward_time=0.400, grad_norm=28.894, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.905e-04, train_time=1.642 +[gpub078:0/16] 2024-01-26 14:04:15,551 (trainer:737) INFO: 12epoch:train:12801-12900batch: iter_time=9.134e-05, forward_time=0.436, loss_ctc=60.141, loss_att=49.424, acc=0.740, loss=52.639, backward_time=0.431, grad_norm=31.986, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.101, optim0_lr0=2.904e-04, train_time=2.281 +[gpub078:0/16] 2024-01-26 14:07:04,085 (trainer:737) INFO: 12epoch:train:12901-13000batch: iter_time=7.885e-05, forward_time=0.289, loss_ctc=52.806, loss_att=52.563, acc=0.704, loss=52.636, backward_time=0.400, grad_norm=32.403, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.093, optim0_lr0=2.903e-04, train_time=1.685 +[gpub078:0/16] 2024-01-26 14:10:06,840 (trainer:737) INFO: 12epoch:train:13001-13100batch: iter_time=9.349e-05, forward_time=0.290, loss_ctc=56.150, loss_att=55.643, acc=0.714, loss=55.795, backward_time=0.403, grad_norm=34.166, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.903e-04, train_time=1.827 +[gpub078:0/16] 2024-01-26 14:14:07,631 (trainer:737) INFO: 12epoch:train:13101-13200batch: iter_time=9.477e-05, forward_time=0.428, loss_ctc=57.583, loss_att=55.284, acc=0.723, loss=55.974, backward_time=0.500, grad_norm=32.869, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.103, optim0_lr0=2.902e-04, train_time=2.407 +[gpub078:0/16] 2024-01-26 14:17:20,577 (trainer:737) INFO: 12epoch:train:13201-13300batch: iter_time=1.019e-04, forward_time=0.290, loss_ctc=55.635, loss_att=53.220, acc=0.726, loss=53.945, backward_time=0.401, grad_norm=30.519, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.094, optim0_lr0=2.901e-04, train_time=1.930 +[gpub078:0/16] 2024-01-26 14:20:11,697 (trainer:737) INFO: 12epoch:train:13301-13400batch: iter_time=9.275e-05, forward_time=0.290, loss_ctc=54.293, loss_att=52.120, acc=0.735, loss=52.772, backward_time=0.403, grad_norm=28.917, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.900e-04, train_time=1.711 +[gpub078:0/16] 2024-01-26 14:23:55,706 (trainer:737) INFO: 12epoch:train:13401-13500batch: iter_time=8.997e-05, forward_time=0.411, loss_ctc=58.583, loss_att=59.303, acc=0.712, loss=59.087, backward_time=0.486, grad_norm=39.159, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.096, optim0_lr0=2.899e-04, train_time=2.239 +[gpub078:0/16] 2024-01-26 14:26:51,613 (trainer:737) INFO: 12epoch:train:13501-13600batch: iter_time=9.400e-05, forward_time=0.289, loss_ctc=48.463, loss_att=50.526, acc=0.731, loss=49.907, backward_time=0.400, grad_norm=27.832, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.898e-04, train_time=1.759 +[gpub078:0/16] 2024-01-26 14:30:32,904 (trainer:737) INFO: 12epoch:train:13601-13700batch: iter_time=9.253e-05, forward_time=0.289, loss_ctc=64.096, loss_att=51.397, acc=0.747, loss=55.206, backward_time=0.401, grad_norm=38.758, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.898e-04, train_time=2.213 +[gpub078:0/16] 2024-01-26 14:32:17,501 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub078:0/16] 2024-01-26 14:32:37,325 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 14:32:41,013 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 14:32:41,013 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub078:0/16] 2024-01-26 14:32:41,016 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 14:39:40,032 (trainer:737) INFO: 12epoch:train:13701-13800batch: iter_time=3.742, forward_time=0.372, loss_ctc=50.619, loss_att=50.629, acc=0.725, loss=50.626, backward_time=0.414, grad_norm=29.315, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.096, optim0_lr0=2.897e-04, train_time=5.471 +[gpub078:0/16] 2024-01-26 14:42:29,190 (trainer:737) INFO: 12epoch:train:13801-13900batch: iter_time=8.509e-05, forward_time=0.288, loss_ctc=49.996, loss_att=50.631, acc=0.730, loss=50.441, backward_time=0.398, grad_norm=29.281, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.896e-04, train_time=1.692 +[gpub078:0/16] 2024-01-26 14:44:52,673 (trainer:737) INFO: 12epoch:train:13901-14000batch: iter_time=8.569e-05, forward_time=0.289, loss_ctc=52.168, loss_att=50.024, acc=0.720, loss=50.667, backward_time=0.401, grad_norm=29.376, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.895e-04, train_time=1.435 +[gpub078:0/16] 2024-01-26 14:48:08,946 (trainer:737) INFO: 12epoch:train:14001-14100batch: iter_time=6.637e-04, forward_time=0.358, loss_ctc=58.119, loss_att=53.458, acc=0.724, loss=54.856, backward_time=0.443, grad_norm=33.676, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.113, optim0_lr0=2.894e-04, train_time=1.963 +[gpub078:0/16] 2024-01-26 14:51:04,715 (trainer:737) INFO: 12epoch:train:14101-14200batch: iter_time=9.371e-05, forward_time=0.285, loss_ctc=56.159, loss_att=46.548, acc=0.713, loss=49.432, backward_time=0.397, grad_norm=32.321, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.094, optim0_lr0=2.894e-04, train_time=1.757 +[gpub078:0/16] 2024-01-26 14:53:42,242 (trainer:737) INFO: 12epoch:train:14201-14300batch: iter_time=9.933e-05, forward_time=0.287, loss_ctc=45.837, loss_att=50.048, acc=0.705, loss=48.785, backward_time=0.399, grad_norm=28.143, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.094, optim0_lr0=2.893e-04, train_time=1.575 +[gpub078:0/16] 2024-01-26 14:56:51,805 (trainer:737) INFO: 12epoch:train:14301-14400batch: iter_time=2.470e-04, forward_time=0.429, loss_ctc=56.727, loss_att=55.549, acc=0.721, loss=55.902, backward_time=0.426, grad_norm=31.616, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.116, optim0_lr0=2.892e-04, train_time=1.895 +[gpub078:0/16] 2024-01-26 14:59:50,693 (trainer:737) INFO: 12epoch:train:14401-14500batch: iter_time=9.321e-05, forward_time=0.290, loss_ctc=64.085, loss_att=56.254, acc=0.712, loss=58.604, backward_time=0.402, grad_norm=34.292, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.891e-04, train_time=1.789 +[gpub078:0/16] 2024-01-26 15:02:24,531 (trainer:737) INFO: 12epoch:train:14501-14600batch: iter_time=8.428e-05, forward_time=0.289, loss_ctc=51.388, loss_att=53.070, acc=0.719, loss=52.566, backward_time=0.402, grad_norm=28.597, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.890e-04, train_time=1.538 +[gpub078:0/16] 2024-01-26 15:05:13,128 (trainer:737) INFO: 12epoch:train:14601-14700batch: iter_time=8.574e-05, forward_time=0.294, loss_ctc=62.571, loss_att=55.033, acc=0.716, loss=57.295, backward_time=0.410, grad_norm=44.812, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.105, optim0_lr0=2.890e-04, train_time=1.686 +[gpub078:0/16] 2024-01-26 15:08:01,886 (trainer:737) INFO: 12epoch:train:14701-14800batch: iter_time=9.305e-05, forward_time=0.407, loss_ctc=49.174, loss_att=55.072, acc=0.704, loss=53.302, backward_time=0.438, grad_norm=29.330, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.100, optim0_lr0=2.889e-04, train_time=1.687 +[gpub078:0/16] 2024-01-26 15:11:14,608 (trainer:737) INFO: 12epoch:train:14801-14900batch: iter_time=9.187e-05, forward_time=0.288, loss_ctc=52.636, loss_att=42.563, acc=0.742, loss=45.585, backward_time=0.400, grad_norm=31.123, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.888e-04, train_time=1.927 +[gpub078:0/16] 2024-01-26 15:13:57,763 (trainer:737) INFO: 12epoch:train:14901-15000batch: iter_time=9.244e-05, forward_time=0.288, loss_ctc=58.712, loss_att=50.330, acc=0.735, loss=52.845, backward_time=0.398, grad_norm=35.206, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.093, optim0_lr0=2.887e-04, train_time=1.631 +[gpub078:0/16] 2024-01-26 15:53:38,776 (trainer:343) INFO: 12epoch results: [train] iter_time=0.291, forward_time=0.323, loss_ctc=56.114, loss_att=53.090, acc=0.719, loss=53.997, backward_time=0.415, grad_norm=36.631, clip=100.000, loss_scale=1.075e+31, optim_step_time=0.096, optim0_lr0=2.950e-04, train_time=1.979, time=8 hours, 15 minutes and 10.02 seconds, total_count=210000, gpu_max_cached_mem_GB=42.420, [valid] loss_ctc=49.367, cer_ctc=0.243, loss_att=49.346, acc=0.625, cer=0.375, wer=0.997, loss=49.352, time=39 minutes and 14.04 seconds, total_count=65394, gpu_max_cached_mem_GB=42.420 +[gpub078:0/16] 2024-01-26 15:53:49,168 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub078:0/16] 2024-01-26 15:53:49,237 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/7epoch.pth +[gpub078:0/16] 2024-01-26 15:53:49,237 (trainer:272) INFO: 13/45epoch started. Estimated time to finish: 1 week, 5 days and 16 hours +[gpub078:0/16] 2024-01-26 15:53:49,256 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub078:0/16] 2024-01-26 15:54:07,557 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub078:0/16] 2024-01-26 15:54:11,010 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub078:0/16] 2024-01-26 15:54:11,010 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub078:0/16] 2024-01-26 15:54:11,013 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub078:0/16] 2024-01-26 16:02:03,494 (trainer:737) INFO: 13epoch:train:1-100batch: iter_time=3.405, forward_time=0.293, loss_ctc=66.003, loss_att=62.670, acc=0.690, loss=63.670, backward_time=0.415, grad_norm=38.380, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.096, optim0_lr0=2.886e-04, train_time=4.942 +[gpub078:0/16] 2024-01-26 16:04:18,969 (trainer:737) INFO: 13epoch:train:101-200batch: iter_time=8.850e-05, forward_time=0.289, loss_ctc=51.104, loss_att=54.558, acc=0.704, loss=53.522, backward_time=0.407, grad_norm=31.184, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.093, optim0_lr0=2.886e-04, train_time=1.355 +[gpub078:0/16] 2024-01-26 16:07:02,665 (trainer:737) INFO: 13epoch:train:201-300batch: iter_time=1.018e-04, forward_time=0.348, loss_ctc=69.088, loss_att=61.634, acc=0.704, loss=63.870, backward_time=0.424, grad_norm=40.764, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.098, optim0_lr0=2.885e-04, train_time=1.637 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2891733.0 ON gpub078 CANCELLED AT 2024-01-26T16:09:25 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.14.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.14.log new file mode 100644 index 0000000000000000000000000000000000000000..05d9245d751810a37e5ee5caa051f410d00c1ca5 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.14.log @@ -0,0 +1,1979 @@ +# Running on gpuc01.delta.ncsa.illinois.edu +# Started at Tue Jan 23 14:54:20 CST 2024 +# SLURMD_NODENAME=gpuc01 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=128 +# SLURM_CPUS_PER_TASK=128 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=8 +# SLURM_GTIDS=0 +# SLURM_JOBID=2886934 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='128(x2)' +# SLURM_JOB_END_TIME=1706216051 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3,4,5,6,7 +# SLURM_JOB_ID=2886934 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpuc[01-02]' +# SLURM_JOB_NUM_NODES=2 +# SLURM_JOB_PARTITION=gpuA100x8 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706043251 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=2000000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=2 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpuc[01-02]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x2)' +# SLURM_TASK_PID=65608 +# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpuc01 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 8 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_bba9bc3d-96cd-4319-939a-daad2e59a370 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 8 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_bba9bc3d-96cd-4319-939a-daad2e59a370 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 8 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_bba9bc3d-96cd-4319-939a-daad2e59a370 +[gpuc01:0/16] 2024-01-23 14:54:34,411 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpuc01:0/16] 2024-01-23 14:54:34,885 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpuc01:0/16] 2024-01-23 14:54:34,918 (s2t:464) INFO: Vocabulary size: 50002 +[gpuc01:0/16] 2024-01-23 14:54:39,305 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpuc01:0/16] 2024-01-23 14:54:39,311 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpuc01:0/16] 2024-01-23 14:54:39,311 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpuc01:0/16] 2024-01-23 14:54:39,311 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpuc01:0/16] 2024-01-23 14:54:39,313 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpuc01:0/16] 2024-01-23 14:54:45,088 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 14:54:46,403 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 14:54:46,403 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=9342, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpuc01:0/16] 2024-01-23 14:54:46,405 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=9342, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 14:54:49,193 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpuc01:65673:65673 [0] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65673:65673 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65673:65673 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpuc01:0/16] 2024-01-23 14:54:52,960 (trainer:284) INFO: 6/45epoch started +[gpuc01:0/16] 2024-01-23 14:54:53,011 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpuc01:0/16] 2024-01-23 14:55:13,407 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 14:55:17,334 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 14:55:17,334 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpuc01:0/16] 2024-01-23 14:55:17,341 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +gpuc02:64034:64034 [3] NCCL INFO cudaDriverVersion 12020 +gpuc02:64034:64034 [3] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64034:64034 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64034:64100 [3] NCCL INFO NET/IB : No device found. +gpuc02:64034:64100 [3] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64034:64100 [3] NCCL INFO Using network Socket +gpuc02:64034:64100 [3] NCCL INFO Setting affinity for GPU 3 to ffff0000 +gpuc02:64034:64100 [3] NCCL INFO Trees [0] 12/-1/-1->11->10 [1] 12/-1/-1->11->10 +gpuc02:64034:64100 [3] NCCL INFO Channel 00/0 : 11[4c000] -> 12[88000] via P2P/IPC/read +gpuc02:64034:64100 [3] NCCL INFO Channel 01/0 : 11[4c000] -> 12[88000] via P2P/IPC/read +gpuc02:64034:64100 [3] NCCL INFO Connected all rings +gpuc02:64034:64100 [3] NCCL INFO Channel 00/0 : 11[4c000] -> 10[48000] via P2P/IPC/read +gpuc02:64034:64100 [3] NCCL INFO Channel 01/0 : 11[4c000] -> 10[48000] via P2P/IPC/read +gpuc02:64034:64100 [3] NCCL INFO Connected all trees +gpuc02:64034:64100 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64034:64100 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64034:64100 [3] NCCL INFO comm 0x1986bdd0 rank 11 nranks 16 cudaDev 3 busId 4c000 - Init COMPLETE +gpuc02:64032:64032 [1] NCCL INFO cudaDriverVersion 12020 +gpuc02:64032:64032 [1] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64032:64032 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64032:64101 [1] NCCL INFO NET/IB : No device found. +gpuc02:64032:64101 [1] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64032:64101 [1] NCCL INFO Using network Socket +gpuc02:64032:64101 [1] NCCL INFO Setting affinity for GPU 1 to ffff0000,00000000 +gpuc02:64032:64101 [1] NCCL INFO Trees [0] 10/-1/-1->9->8 [1] 10/-1/-1->9->8 +gpuc02:64032:64101 [1] NCCL INFO Channel 00/0 : 9[b000] -> 10[48000] via P2P/IPC/read +gpuc02:64032:64101 [1] NCCL INFO Channel 01/0 : 9[b000] -> 10[48000] via P2P/IPC/read +gpuc02:64032:64101 [1] NCCL INFO Connected all rings +gpuc02:64032:64101 [1] NCCL INFO Channel 00/0 : 9[b000] -> 8[7000] via P2P/IPC/read +gpuc02:64032:64101 [1] NCCL INFO Channel 01/0 : 9[b000] -> 8[7000] via P2P/IPC/read +gpuc02:64032:64101 [1] NCCL INFO Connected all trees +gpuc02:64032:64101 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64032:64101 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64032:64101 [1] NCCL INFO comm 0x16499180 rank 9 nranks 16 cudaDev 1 busId b000 - Init COMPLETE +gpuc02:64038:64038 [7] NCCL INFO cudaDriverVersion 12020 +gpuc02:64038:64038 [7] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64038:64038 [7] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64038:64097 [7] NCCL INFO NET/IB : No device found. +gpuc02:64038:64097 [7] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64038:64097 [7] NCCL INFO Using network Socket +gpuc02:64038:64097 [7] NCCL INFO Setting affinity for GPU 7 to ffff0000,00000000,00000000 +gpuc02:64038:64097 [7] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpuc02:64038:64097 [7] NCCL INFO Channel 00/0 : 15[cb000] -> 0[7000] [send] via NET/Socket/1 +gpuc02:64038:64097 [7] NCCL INFO Channel 01/0 : 15[cb000] -> 0[7000] [send] via NET/Socket/1 +gpuc02:64038:64097 [7] NCCL INFO Connected all rings +gpuc02:64038:64097 [7] NCCL INFO Channel 00/0 : 15[cb000] -> 14[c8000] via P2P/IPC/read +gpuc02:64038:64097 [7] NCCL INFO Channel 01/0 : 15[cb000] -> 14[c8000] via P2P/IPC/read +gpuc02:64038:64097 [7] NCCL INFO Connected all trees +gpuc02:64038:64097 [7] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64038:64097 [7] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64038:64097 [7] NCCL INFO comm 0x7533b180 rank 15 nranks 16 cudaDev 7 busId cb000 - Init COMPLETE +gpuc02:64036:64036 [5] NCCL INFO cudaDriverVersion 12020 +gpuc02:64036:64036 [5] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64036:64036 [5] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64036:64102 [5] NCCL INFO NET/IB : No device found. +gpuc02:64036:64102 [5] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64036:64102 [5] NCCL INFO Using network Socket +gpuc02:64036:64102 [5] NCCL INFO Setting affinity for GPU 5 to ffff0000,00000000,00000000,00000000 +gpuc02:64036:64102 [5] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpuc02:64036:64102 [5] NCCL INFO Channel 00/0 : 13[8b000] -> 14[c8000] via P2P/IPC/read +gpuc02:64036:64102 [5] NCCL INFO Channel 01/0 : 13[8b000] -> 14[c8000] via P2P/IPC/read +gpuc02:64036:64102 [5] NCCL INFO Connected all rings +gpuc02:64036:64102 [5] NCCL INFO Channel 00/0 : 13[8b000] -> 12[88000] via P2P/IPC/read +gpuc02:64036:64102 [5] NCCL INFO Channel 01/0 : 13[8b000] -> 12[88000] via P2P/IPC/read +gpuc02:64036:64102 [5] NCCL INFO Connected all trees +gpuc02:64036:64102 [5] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64036:64102 [5] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64036:64102 [5] NCCL INFO comm 0x146eabe0 rank 13 nranks 16 cudaDev 5 busId 8b000 - Init COMPLETE +gpuc02:64035:64035 [4] NCCL INFO cudaDriverVersion 12020 +gpuc02:64035:64035 [4] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64035:64035 [4] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64035:64098 [4] NCCL INFO NET/IB : No device found. +gpuc02:64035:64098 [4] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64035:64098 [4] NCCL INFO Using network Socket +gpuc02:64035:64098 [4] NCCL INFO Setting affinity for GPU 4 to ffff0000,00000000,00000000,00000000 +gpuc02:64035:64098 [4] NCCL INFO Trees [0] 13/-1/-1->12->11 [1] 13/-1/-1->12->11 +gpuc02:64035:64098 [4] NCCL INFO Channel 00/0 : 12[88000] -> 13[8b000] via P2P/IPC/read +gpuc02:64035:64098 [4] NCCL INFO Channel 01/0 : 12[88000] -> 13[8b000] via P2P/IPC/read +gpuc02:64035:64098 [4] NCCL INFO Connected all rings +gpuc02:64035:64098 [4] NCCL INFO Channel 00/0 : 12[88000] -> 11[4c000] via P2P/IPC/read +gpuc02:64035:64098 [4] NCCL INFO Channel 01/0 : 12[88000] -> 11[4c000] via P2P/IPC/read +gpuc02:64035:64098 [4] NCCL INFO Connected all trees +gpuc02:64035:64098 [4] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64035:64098 [4] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64035:64098 [4] NCCL INFO comm 0x7841b240 rank 12 nranks 16 cudaDev 4 busId 88000 - Init COMPLETE +gpuc01:65675:65675 [2] NCCL INFO cudaDriverVersion 12020 +gpuc01:65675:65675 [2] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65675:65675 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65675:65743 [2] NCCL INFO NET/IB : No device found. +gpuc01:65675:65743 [2] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65675:65743 [2] NCCL INFO Using network Socket +gpuc01:65675:65743 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpuc01:65675:65743 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpuc01:65675:65743 [2] NCCL INFO Channel 00/0 : 2[48000] -> 3[4c000] via P2P/IPC/read +gpuc01:65675:65743 [2] NCCL INFO Channel 01/0 : 2[48000] -> 3[4c000] via P2P/IPC/read +gpuc01:65675:65743 [2] NCCL INFO Connected all rings +gpuc01:65675:65743 [2] NCCL INFO Channel 00/0 : 2[48000] -> 1[b000] via P2P/IPC/read +gpuc01:65675:65743 [2] NCCL INFO Channel 01/0 : 2[48000] -> 1[b000] via P2P/IPC/read +gpuc01:65675:65743 [2] NCCL INFO Connected all trees +gpuc01:65675:65743 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65675:65743 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65675:65743 [2] NCCL INFO comm 0x607c5180 rank 2 nranks 16 cudaDev 2 busId 48000 - Init COMPLETE +gpuc02:64031:64031 [0] NCCL INFO cudaDriverVersion 12020 +gpuc02:64031:64031 [0] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64031:64031 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64031:64103 [0] NCCL INFO NET/IB : No device found. +gpuc02:64031:64103 [0] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64031:64103 [0] NCCL INFO Using network Socket +gpuc02:64031:64103 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpuc02:64031:64103 [0] NCCL INFO Trees [0] 9/-1/-1->8->0 [1] 9/0/-1->8->-1 +gpuc02:64031:64103 [0] NCCL INFO Channel 00/0 : 7[cb000] -> 8[7000] [receive] via NET/Socket/1 +gpuc02:64031:64103 [0] NCCL INFO Channel 01/0 : 7[cb000] -> 8[7000] [receive] via NET/Socket/1 +gpuc02:64031:64103 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[b000] via P2P/IPC/read +gpuc02:64031:64103 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[b000] via P2P/IPC/read +gpuc02:64031:64103 [0] NCCL INFO Connected all rings +gpuc02:64031:64103 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpuc02:64031:64103 [0] NCCL INFO Channel 01/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpuc02:64031:64103 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpuc02:64031:64103 [0] NCCL INFO Channel 01/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpuc02:64031:64103 [0] NCCL INFO Connected all trees +gpuc02:64031:64103 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64031:64103 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64031:64103 [0] NCCL INFO comm 0x78f18190 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpuc01:65680:65680 [7] NCCL INFO cudaDriverVersion 12020 +gpuc01:65680:65680 [7] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65680:65680 [7] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65680:65744 [7] NCCL INFO NET/IB : No device found. +gpuc01:65680:65744 [7] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65680:65744 [7] NCCL INFO Using network Socket +gpuc01:65680:65744 [7] NCCL INFO Setting affinity for GPU 7 to ffff0000,00000000,00000000 +gpuc01:65680:65744 [7] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpuc01:65680:65744 [7] NCCL INFO Channel 00/0 : 7[cb000] -> 8[7000] [send] via NET/Socket/1 +gpuc01:65680:65744 [7] NCCL INFO Channel 01/0 : 7[cb000] -> 8[7000] [send] via NET/Socket/1 +gpuc01:65680:65744 [7] NCCL INFO Connected all rings +gpuc01:65680:65744 [7] NCCL INFO Channel 00/0 : 7[cb000] -> 6[c8000] via P2P/IPC/read +gpuc01:65680:65744 [7] NCCL INFO Channel 01/0 : 7[cb000] -> 6[c8000] via P2P/IPC/read +gpuc01:65680:65744 [7] NCCL INFO Connected all trees +gpuc01:65680:65744 [7] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65680:65744 [7] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65680:65744 [7] NCCL INFO comm 0x13fe39b0 rank 7 nranks 16 cudaDev 7 busId cb000 - Init COMPLETE +gpuc01:65674:65674 [1] NCCL INFO cudaDriverVersion 12020 +gpuc01:65674:65674 [1] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65674:65674 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65674:65747 [1] NCCL INFO NET/IB : No device found. +gpuc01:65674:65747 [1] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65674:65747 [1] NCCL INFO Using network Socket +gpuc01:65674:65747 [1] NCCL INFO Setting affinity for GPU 1 to ffff0000,00000000 +gpuc01:65674:65747 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpuc01:65674:65747 [1] NCCL INFO Channel 00/0 : 1[b000] -> 2[48000] via P2P/IPC/read +gpuc01:65674:65747 [1] NCCL INFO Channel 01/0 : 1[b000] -> 2[48000] via P2P/IPC/read +gpuc01:65674:65747 [1] NCCL INFO Connected all rings +gpuc01:65674:65747 [1] NCCL INFO Channel 00/0 : 1[b000] -> 0[7000] via P2P/IPC/read +gpuc01:65674:65747 [1] NCCL INFO Channel 01/0 : 1[b000] -> 0[7000] via P2P/IPC/read +gpuc01:65674:65747 [1] NCCL INFO Connected all trees +gpuc01:65674:65747 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65674:65747 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65674:65747 [1] NCCL INFO comm 0x178ef6d0 rank 1 nranks 16 cudaDev 1 busId b000 - Init COMPLETE +gpuc02:64037:64037 [6] NCCL INFO cudaDriverVersion 12020 +gpuc02:64037:64037 [6] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64037:64037 [6] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64037:64104 [6] NCCL INFO NET/IB : No device found. +gpuc02:64037:64104 [6] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64037:64104 [6] NCCL INFO Using network Socket +gpuc02:64037:64104 [6] NCCL INFO Setting affinity for GPU 6 to ffff0000,00000000,00000000 +gpuc02:64037:64104 [6] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpuc02:64037:64104 [6] NCCL INFO Channel 00/0 : 14[c8000] -> 15[cb000] via P2P/IPC/read +gpuc02:64037:64104 [6] NCCL INFO Channel 01/0 : 14[c8000] -> 15[cb000] via P2P/IPC/read +gpuc02:64037:64104 [6] NCCL INFO Connected all rings +gpuc02:64037:64104 [6] NCCL INFO Channel 00/0 : 14[c8000] -> 13[8b000] via P2P/IPC/read +gpuc02:64037:64104 [6] NCCL INFO Channel 01/0 : 14[c8000] -> 13[8b000] via P2P/IPC/read +gpuc02:64037:64104 [6] NCCL INFO Connected all trees +gpuc02:64037:64104 [6] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64037:64104 [6] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64037:64104 [6] NCCL INFO comm 0x1169bcc0 rank 14 nranks 16 cudaDev 6 busId c8000 - Init COMPLETE +gpuc01:65673:65741 [0] NCCL INFO NET/IB : No device found. +gpuc01:65673:65741 [0] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65673:65741 [0] NCCL INFO Using network Socket +gpuc01:65673:65741 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpuc01:65673:65741 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpuc01:65673:65741 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpuc01:65673:65741 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->8 +gpuc01:65673:65741 [0] NCCL INFO Channel 00/0 : 15[cb000] -> 0[7000] [receive] via NET/Socket/1 +gpuc01:65673:65741 [0] NCCL INFO Channel 01/0 : 15[cb000] -> 0[7000] [receive] via NET/Socket/1 +gpuc01:65673:65741 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[b000] via P2P/IPC/read +gpuc01:65673:65741 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[b000] via P2P/IPC/read +gpuc01:65673:65741 [0] NCCL INFO Connected all rings +gpuc01:65673:65741 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpuc01:65673:65741 [0] NCCL INFO Channel 01/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpuc01:65673:65741 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpuc01:65673:65741 [0] NCCL INFO Channel 01/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpuc01:65673:65741 [0] NCCL INFO Connected all trees +gpuc01:65673:65741 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65673:65741 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65673:65741 [0] NCCL INFO comm 0x1bd5cad0 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpuc01:65678:65678 [5] NCCL INFO cudaDriverVersion 12020 +gpuc01:65678:65678 [5] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65678:65678 [5] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65678:65746 [5] NCCL INFO NET/IB : No device found. +gpuc01:65678:65746 [5] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65678:65746 [5] NCCL INFO Using network Socket +gpuc01:65678:65746 [5] NCCL INFO Setting affinity for GPU 5 to ffff0000,00000000,00000000,00000000 +gpuc01:65678:65746 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 +gpuc01:65678:65746 [5] NCCL INFO Channel 00/0 : 5[8b000] -> 6[c8000] via P2P/IPC/read +gpuc01:65678:65746 [5] NCCL INFO Channel 01/0 : 5[8b000] -> 6[c8000] via P2P/IPC/read +gpuc01:65678:65746 [5] NCCL INFO Connected all rings +gpuc01:65678:65746 [5] NCCL INFO Channel 00/0 : 5[8b000] -> 4[88000] via P2P/IPC/read +gpuc01:65678:65746 [5] NCCL INFO Channel 01/0 : 5[8b000] -> 4[88000] via P2P/IPC/read +gpuc01:65678:65746 [5] NCCL INFO Connected all trees +gpuc01:65678:65746 [5] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65678:65746 [5] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65678:65746 [5] NCCL INFO comm 0x5dbeb010 rank 5 nranks 16 cudaDev 5 busId 8b000 - Init COMPLETE +gpuc01:65679:65679 [6] NCCL INFO cudaDriverVersion 12020 +gpuc01:65679:65679 [6] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65679:65679 [6] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65679:65742 [6] NCCL INFO NET/IB : No device found. +gpuc01:65679:65742 [6] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65679:65742 [6] NCCL INFO Using network Socket +gpuc01:65679:65742 [6] NCCL INFO Setting affinity for GPU 6 to ffff0000,00000000,00000000 +gpuc01:65679:65742 [6] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpuc01:65679:65742 [6] NCCL INFO Channel 00/0 : 6[c8000] -> 7[cb000] via P2P/IPC/read +gpuc01:65679:65742 [6] NCCL INFO Channel 01/0 : 6[c8000] -> 7[cb000] via P2P/IPC/read +gpuc01:65679:65742 [6] NCCL INFO Connected all rings +gpuc01:65679:65742 [6] NCCL INFO Channel 00/0 : 6[c8000] -> 5[8b000] via P2P/IPC/read +gpuc01:65679:65742 [6] NCCL INFO Channel 01/0 : 6[c8000] -> 5[8b000] via P2P/IPC/read +gpuc01:65679:65742 [6] NCCL INFO Connected all trees +gpuc01:65679:65742 [6] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65679:65742 [6] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65679:65742 [6] NCCL INFO comm 0x5ef0c670 rank 6 nranks 16 cudaDev 6 busId c8000 - Init COMPLETE +gpuc01:65676:65676 [3] NCCL INFO cudaDriverVersion 12020 +gpuc01:65676:65676 [3] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65676:65676 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65676:65748 [3] NCCL INFO NET/IB : No device found. +gpuc01:65676:65748 [3] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65676:65748 [3] NCCL INFO Using network Socket +gpuc01:65676:65748 [3] NCCL INFO Setting affinity for GPU 3 to ffff0000 +gpuc01:65676:65748 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 +gpuc01:65676:65748 [3] NCCL INFO Channel 00/0 : 3[4c000] -> 4[88000] via P2P/IPC/read +gpuc01:65676:65748 [3] NCCL INFO Channel 01/0 : 3[4c000] -> 4[88000] via P2P/IPC/read +gpuc01:65676:65748 [3] NCCL INFO Connected all rings +gpuc01:65676:65748 [3] NCCL INFO Channel 00/0 : 3[4c000] -> 2[48000] via P2P/IPC/read +gpuc01:65676:65748 [3] NCCL INFO Channel 01/0 : 3[4c000] -> 2[48000] via P2P/IPC/read +gpuc01:65676:65748 [3] NCCL INFO Connected all trees +gpuc01:65676:65748 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65676:65748 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65676:65748 [3] NCCL INFO comm 0x77358280 rank 3 nranks 16 cudaDev 3 busId 4c000 - Init COMPLETE +gpuc01:65677:65677 [4] NCCL INFO cudaDriverVersion 12020 +gpuc01:65677:65677 [4] NCCL INFO Bootstrap : Using eth0:172.28.23.201<0> +gpuc01:65677:65677 [4] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc01:65677:65745 [4] NCCL INFO NET/IB : No device found. +gpuc01:65677:65745 [4] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.201<0> [1]hsn0:141.142.145.201<0> +gpuc01:65677:65745 [4] NCCL INFO Using network Socket +gpuc01:65677:65745 [4] NCCL INFO Setting affinity for GPU 4 to ffff0000,00000000,00000000,00000000 +gpuc01:65677:65745 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 +gpuc01:65677:65745 [4] NCCL INFO Channel 00/0 : 4[88000] -> 5[8b000] via P2P/IPC/read +gpuc01:65677:65745 [4] NCCL INFO Channel 01/0 : 4[88000] -> 5[8b000] via P2P/IPC/read +gpuc01:65677:65745 [4] NCCL INFO Connected all rings +gpuc01:65677:65745 [4] NCCL INFO Channel 00/0 : 4[88000] -> 3[4c000] via P2P/IPC/read +gpuc01:65677:65745 [4] NCCL INFO Channel 01/0 : 4[88000] -> 3[4c000] via P2P/IPC/read +gpuc01:65677:65745 [4] NCCL INFO Connected all trees +gpuc01:65677:65745 [4] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc01:65677:65745 [4] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc01:65677:65745 [4] NCCL INFO comm 0x60f79500 rank 4 nranks 16 cudaDev 4 busId 88000 - Init COMPLETE +gpuc02:64033:64033 [2] NCCL INFO cudaDriverVersion 12020 +gpuc02:64033:64033 [2] NCCL INFO Bootstrap : Using eth0:172.28.23.202<0> +gpuc02:64033:64033 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpuc02:64033:64099 [2] NCCL INFO NET/IB : No device found. +gpuc02:64033:64099 [2] NCCL INFO NET/Socket : Using [0]eth0:172.28.23.202<0> [1]hsn0:141.142.145.202<0> +gpuc02:64033:64099 [2] NCCL INFO Using network Socket +gpuc02:64033:64099 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpuc02:64033:64099 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpuc02:64033:64099 [2] NCCL INFO Channel 00/0 : 10[48000] -> 11[4c000] via P2P/IPC/read +gpuc02:64033:64099 [2] NCCL INFO Channel 01/0 : 10[48000] -> 11[4c000] via P2P/IPC/read +gpuc02:64033:64099 [2] NCCL INFO Connected all rings +gpuc02:64033:64099 [2] NCCL INFO Channel 00/0 : 10[48000] -> 9[b000] via P2P/IPC/read +gpuc02:64033:64099 [2] NCCL INFO Channel 01/0 : 10[48000] -> 9[b000] via P2P/IPC/read +gpuc02:64033:64099 [2] NCCL INFO Connected all trees +gpuc02:64033:64099 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpuc02:64033:64099 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpuc02:64033:64099 [2] NCCL INFO comm 0x78ed87d0 rank 10 nranks 16 cudaDev 2 busId 48000 - Init COMPLETE +[gpuc01:0/16] 2024-01-23 14:57:23,789 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpuc01:0/16] 2024-01-23 14:59:49,879 (trainer:737) INFO: 6epoch:train:1-100batch: iter_time=1.383, forward_time=0.123, loss_ctc=74.653, loss_att=70.242, acc=0.635, loss=71.565, backward_time=0.143, grad_norm=30.045, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.471e-04, train_time=5.936 +[gpuc01:0/16] 2024-01-23 15:02:16,338 (trainer:737) INFO: 6epoch:train:101-200batch: iter_time=1.713e-04, forward_time=0.115, loss_ctc=72.379, loss_att=72.675, acc=0.618, loss=72.586, backward_time=0.137, grad_norm=27.565, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.470e-04, train_time=2.930 +[gpuc01:0/16] 2024-01-23 15:04:42,758 (trainer:737) INFO: 6epoch:train:201-300batch: iter_time=1.720e-04, forward_time=0.116, loss_ctc=78.465, loss_att=81.089, acc=0.606, loss=80.302, backward_time=0.138, grad_norm=29.067, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.468e-04, train_time=2.928 +[gpuc01:0/16] 2024-01-23 15:07:08,423 (trainer:737) INFO: 6epoch:train:301-400batch: iter_time=1.680e-04, forward_time=0.116, loss_ctc=75.745, loss_att=67.448, acc=0.622, loss=69.937, backward_time=0.135, grad_norm=26.802, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.467e-04, train_time=2.913 +[gpuc01:0/16] 2024-01-23 15:09:34,602 (trainer:737) INFO: 6epoch:train:401-500batch: iter_time=1.786e-04, forward_time=0.115, loss_ctc=77.316, loss_att=70.605, acc=0.627, loss=72.619, backward_time=0.135, grad_norm=29.122, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.465e-04, train_time=2.923 +[gpuc01:0/16] 2024-01-23 15:12:01,748 (trainer:737) INFO: 6epoch:train:501-600batch: iter_time=1.706e-04, forward_time=0.115, loss_ctc=68.813, loss_att=70.225, acc=0.625, loss=69.802, backward_time=0.135, grad_norm=26.969, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.464e-04, train_time=2.943 +[gpuc01:0/16] 2024-01-23 15:14:27,895 (trainer:737) INFO: 6epoch:train:601-700batch: iter_time=1.771e-04, forward_time=0.115, loss_ctc=68.369, loss_att=69.981, acc=0.605, loss=69.497, backward_time=0.135, grad_norm=24.312, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.462e-04, train_time=2.923 +[gpuc01:0/16] 2024-01-23 15:16:53,082 (trainer:737) INFO: 6epoch:train:701-800batch: iter_time=1.895e-04, forward_time=0.115, loss_ctc=69.461, loss_att=67.086, acc=0.628, loss=67.799, backward_time=0.135, grad_norm=27.404, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.461e-04, train_time=2.904 +[gpuc01:0/16] 2024-01-23 15:19:20,282 (trainer:737) INFO: 6epoch:train:801-900batch: iter_time=1.770e-04, forward_time=0.115, loss_ctc=86.466, loss_att=72.725, acc=0.627, loss=76.847, backward_time=0.136, grad_norm=33.478, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.459e-04, train_time=2.944 +[gpuc01:0/16] 2024-01-23 15:21:48,062 (trainer:737) INFO: 6epoch:train:901-1000batch: iter_time=1.699e-04, forward_time=0.115, loss_ctc=72.813, loss_att=65.988, acc=0.630, loss=68.035, backward_time=0.135, grad_norm=27.229, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.065, optim0_lr0=4.458e-04, train_time=2.955 +[gpuc01:0/16] 2024-01-23 15:24:15,401 (trainer:737) INFO: 6epoch:train:1001-1100batch: iter_time=1.882e-04, forward_time=0.115, loss_ctc=74.061, loss_att=66.214, acc=0.636, loss=68.568, backward_time=0.135, grad_norm=33.650, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.457e-04, train_time=2.947 +[gpuc01:0/16] 2024-01-23 15:26:42,699 (trainer:737) INFO: 6epoch:train:1101-1200batch: iter_time=1.613e-04, forward_time=0.115, loss_ctc=75.201, loss_att=67.062, acc=0.624, loss=69.503, backward_time=0.135, grad_norm=29.480, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.455e-04, train_time=2.946 +[gpuc01:0/16] 2024-01-23 15:29:10,795 (trainer:737) INFO: 6epoch:train:1201-1300batch: iter_time=1.847e-04, forward_time=0.115, loss_ctc=75.204, loss_att=67.473, acc=0.620, loss=69.793, backward_time=0.137, grad_norm=31.284, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.454e-04, train_time=2.962 +[gpuc01:0/16] 2024-01-23 15:31:38,951 (trainer:737) INFO: 6epoch:train:1301-1400batch: iter_time=1.740e-04, forward_time=0.116, loss_ctc=71.496, loss_att=68.384, acc=0.642, loss=69.317, backward_time=0.135, grad_norm=28.104, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.452e-04, train_time=2.963 +[gpuc01:0/16] 2024-01-23 15:34:06,622 (trainer:737) INFO: 6epoch:train:1401-1500batch: iter_time=1.652e-04, forward_time=0.114, loss_ctc=57.174, loss_att=52.343, acc=0.628, loss=53.792, backward_time=0.135, grad_norm=24.659, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.065, optim0_lr0=4.451e-04, train_time=2.953 +[gpuc01:0/16] 2024-01-23 15:36:34,695 (trainer:737) INFO: 6epoch:train:1501-1600batch: iter_time=1.629e-04, forward_time=0.118, loss_ctc=94.054, loss_att=79.139, acc=0.616, loss=83.613, backward_time=0.136, grad_norm=34.040, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.065, optim0_lr0=4.449e-04, train_time=2.961 +[gpuc01:0/16] 2024-01-23 15:39:01,288 (trainer:737) INFO: 6epoch:train:1601-1700batch: iter_time=1.740e-04, forward_time=0.116, loss_ctc=73.266, loss_att=78.532, acc=0.616, loss=76.952, backward_time=0.136, grad_norm=31.987, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.065, optim0_lr0=4.448e-04, train_time=2.932 +[gpuc01:0/16] 2024-01-23 15:41:28,084 (trainer:737) INFO: 6epoch:train:1701-1800batch: iter_time=1.792e-04, forward_time=0.115, loss_ctc=71.174, loss_att=69.177, acc=0.617, loss=69.776, backward_time=0.136, grad_norm=25.165, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.446e-04, train_time=2.936 +[gpuc01:0/16] 2024-01-23 15:43:56,227 (trainer:737) INFO: 6epoch:train:1801-1900batch: iter_time=1.925e-04, forward_time=0.116, loss_ctc=76.690, loss_att=78.679, acc=0.618, loss=78.082, backward_time=0.136, grad_norm=28.880, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.445e-04, train_time=2.963 +[gpuc01:0/16] 2024-01-23 15:46:25,348 (trainer:737) INFO: 6epoch:train:1901-2000batch: iter_time=1.642e-04, forward_time=0.116, loss_ctc=91.833, loss_att=83.458, acc=0.631, loss=85.971, backward_time=0.136, grad_norm=33.296, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.066, optim0_lr0=4.443e-04, train_time=2.982 +[gpuc01:0/16] 2024-01-23 15:48:54,208 (trainer:737) INFO: 6epoch:train:2001-2100batch: iter_time=1.726e-04, forward_time=0.116, loss_ctc=65.789, loss_att=60.476, acc=0.646, loss=62.070, backward_time=0.135, grad_norm=24.606, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.442e-04, train_time=2.977 +[gpuc01:0/16] 2024-01-23 15:51:22,822 (trainer:737) INFO: 6epoch:train:2101-2200batch: iter_time=2.012e-04, forward_time=0.116, loss_ctc=74.956, loss_att=78.386, acc=0.608, loss=77.357, backward_time=0.136, grad_norm=29.327, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.440e-04, train_time=2.972 +[gpuc01:0/16] 2024-01-23 15:53:48,367 (trainer:737) INFO: 6epoch:train:2201-2300batch: iter_time=1.613e-04, forward_time=0.115, loss_ctc=69.821, loss_att=69.313, acc=0.625, loss=69.465, backward_time=0.135, grad_norm=29.655, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.439e-04, train_time=2.911 +[gpuc01:0/16] 2024-01-23 15:56:14,694 (trainer:737) INFO: 6epoch:train:2301-2400batch: iter_time=1.675e-04, forward_time=0.116, loss_ctc=66.843, loss_att=58.541, acc=0.634, loss=61.032, backward_time=0.135, grad_norm=26.631, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.437e-04, train_time=2.926 +[gpuc01:0/16] 2024-01-23 15:58:42,524 (trainer:737) INFO: 6epoch:train:2401-2500batch: iter_time=1.576e-04, forward_time=0.117, loss_ctc=81.396, loss_att=64.416, acc=0.637, loss=69.510, backward_time=0.135, grad_norm=34.065, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.436e-04, train_time=2.956 +[gpuc01:0/16] 2024-01-23 15:58:45,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpuc01:0/16] 2024-01-23 15:59:04,162 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 15:59:07,816 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 15:59:07,816 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpuc01:0/16] 2024-01-23 15:59:07,822 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 16:03:34,924 (trainer:737) INFO: 6epoch:train:2501-2600batch: iter_time=1.435, forward_time=0.119, loss_ctc=71.458, loss_att=70.294, acc=0.645, loss=70.643, backward_time=0.137, grad_norm=28.610, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.069, optim0_lr0=4.435e-04, train_time=5.848 +[gpuc01:0/16] 2024-01-23 16:06:02,291 (trainer:737) INFO: 6epoch:train:2601-2700batch: iter_time=2.047e-04, forward_time=0.117, loss_ctc=69.242, loss_att=73.015, acc=0.633, loss=71.883, backward_time=0.135, grad_norm=25.418, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.433e-04, train_time=2.947 +[gpuc01:0/16] 2024-01-23 16:08:30,278 (trainer:737) INFO: 6epoch:train:2701-2800batch: iter_time=2.227e-04, forward_time=0.118, loss_ctc=76.335, loss_att=82.696, acc=0.613, loss=80.788, backward_time=0.136, grad_norm=28.460, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.432e-04, train_time=2.960 +[gpuc01:0/16] 2024-01-23 16:10:57,719 (trainer:737) INFO: 6epoch:train:2801-2900batch: iter_time=2.494e-04, forward_time=0.117, loss_ctc=73.980, loss_att=73.561, acc=0.621, loss=73.686, backward_time=0.135, grad_norm=29.260, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.430e-04, train_time=2.949 +[gpuc01:0/16] 2024-01-23 16:13:25,077 (trainer:737) INFO: 6epoch:train:2901-3000batch: iter_time=2.202e-04, forward_time=0.116, loss_ctc=74.396, loss_att=69.403, acc=0.634, loss=70.901, backward_time=0.135, grad_norm=28.721, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.429e-04, train_time=2.947 +[gpuc01:0/16] 2024-01-23 16:15:53,687 (trainer:737) INFO: 6epoch:train:3001-3100batch: iter_time=2.313e-04, forward_time=0.116, loss_ctc=67.601, loss_att=72.208, acc=0.632, loss=70.826, backward_time=0.136, grad_norm=25.799, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.427e-04, train_time=2.972 +[gpuc01:0/16] 2024-01-23 16:18:24,129 (trainer:737) INFO: 6epoch:train:3101-3200batch: iter_time=2.464e-04, forward_time=0.115, loss_ctc=65.864, loss_att=67.530, acc=0.627, loss=67.031, backward_time=0.135, grad_norm=24.290, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.426e-04, train_time=3.009 +[gpuc01:0/16] 2024-01-23 16:20:54,396 (trainer:737) INFO: 6epoch:train:3201-3300batch: iter_time=2.427e-04, forward_time=0.116, loss_ctc=65.993, loss_att=65.479, acc=0.638, loss=65.633, backward_time=0.135, grad_norm=28.206, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.424e-04, train_time=3.005 +[gpuc01:0/16] 2024-01-23 16:23:23,434 (trainer:737) INFO: 6epoch:train:3301-3400batch: iter_time=2.369e-04, forward_time=0.117, loss_ctc=83.603, loss_att=73.199, acc=0.633, loss=76.320, backward_time=0.136, grad_norm=32.179, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.423e-04, train_time=2.981 +[gpuc01:0/16] 2024-01-23 16:25:51,795 (trainer:737) INFO: 6epoch:train:3401-3500batch: iter_time=2.196e-04, forward_time=0.117, loss_ctc=71.827, loss_att=65.457, acc=0.649, loss=67.368, backward_time=0.136, grad_norm=27.118, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.422e-04, train_time=2.967 +[gpuc01:0/16] 2024-01-23 16:28:20,715 (trainer:737) INFO: 6epoch:train:3501-3600batch: iter_time=2.406e-04, forward_time=0.118, loss_ctc=72.381, loss_att=66.318, acc=0.646, loss=68.137, backward_time=0.136, grad_norm=31.576, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.420e-04, train_time=2.978 +[gpuc01:0/16] 2024-01-23 16:30:49,104 (trainer:737) INFO: 6epoch:train:3601-3700batch: iter_time=2.289e-04, forward_time=0.117, loss_ctc=72.575, loss_att=65.559, acc=0.644, loss=67.663, backward_time=0.136, grad_norm=28.286, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.419e-04, train_time=2.968 +[gpuc01:0/16] 2024-01-23 16:33:17,783 (trainer:737) INFO: 6epoch:train:3701-3800batch: iter_time=2.186e-04, forward_time=0.117, loss_ctc=71.924, loss_att=66.156, acc=0.636, loss=67.887, backward_time=0.136, grad_norm=27.407, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.417e-04, train_time=2.973 +[gpuc01:0/16] 2024-01-23 16:35:46,042 (trainer:737) INFO: 6epoch:train:3801-3900batch: iter_time=1.967e-04, forward_time=0.117, loss_ctc=70.588, loss_att=68.497, acc=0.652, loss=69.125, backward_time=0.136, grad_norm=28.114, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.416e-04, train_time=2.965 +[gpuc01:0/16] 2024-01-23 16:38:15,662 (trainer:737) INFO: 6epoch:train:3901-4000batch: iter_time=2.179e-04, forward_time=0.116, loss_ctc=55.168, loss_att=50.960, acc=0.641, loss=52.222, backward_time=0.135, grad_norm=22.197, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.414e-04, train_time=2.992 +[gpuc01:0/16] 2024-01-23 16:40:44,557 (trainer:737) INFO: 6epoch:train:4001-4100batch: iter_time=2.201e-04, forward_time=0.117, loss_ctc=90.259, loss_att=77.413, acc=0.627, loss=81.267, backward_time=0.136, grad_norm=33.404, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.413e-04, train_time=2.978 +[gpuc01:0/16] 2024-01-23 16:43:13,908 (trainer:737) INFO: 6epoch:train:4101-4200batch: iter_time=2.225e-04, forward_time=0.124, loss_ctc=69.846, loss_att=76.457, acc=0.631, loss=74.474, backward_time=0.136, grad_norm=27.533, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.411e-04, train_time=2.987 +[gpuc01:0/16] 2024-01-23 16:45:42,061 (trainer:737) INFO: 6epoch:train:4201-4300batch: iter_time=1.881e-04, forward_time=0.117, loss_ctc=69.164, loss_att=68.089, acc=0.639, loss=68.412, backward_time=0.137, grad_norm=25.316, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.410e-04, train_time=2.963 +[gpuc01:0/16] 2024-01-23 16:48:12,398 (trainer:737) INFO: 6epoch:train:4301-4400batch: iter_time=1.763e-04, forward_time=0.117, loss_ctc=74.526, loss_att=79.521, acc=0.629, loss=78.023, backward_time=0.136, grad_norm=28.993, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.409e-04, train_time=3.007 +[gpuc01:0/16] 2024-01-23 16:50:42,383 (trainer:737) INFO: 6epoch:train:4401-4500batch: iter_time=2.065e-04, forward_time=0.119, loss_ctc=90.095, loss_att=86.794, acc=0.632, loss=87.784, backward_time=0.136, grad_norm=33.667, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.407e-04, train_time=2.999 +[gpuc01:0/16] 2024-01-23 16:53:12,180 (trainer:737) INFO: 6epoch:train:4501-4600batch: iter_time=1.961e-04, forward_time=0.117, loss_ctc=64.873, loss_att=60.529, acc=0.657, loss=61.832, backward_time=0.135, grad_norm=24.319, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.406e-04, train_time=2.996 +[gpuc01:0/16] 2024-01-23 16:55:41,725 (trainer:737) INFO: 6epoch:train:4601-4700batch: iter_time=1.999e-04, forward_time=0.118, loss_ctc=72.953, loss_att=78.886, acc=0.622, loss=77.106, backward_time=0.137, grad_norm=28.267, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.404e-04, train_time=2.991 +[gpuc01:0/16] 2024-01-23 16:58:10,195 (trainer:737) INFO: 6epoch:train:4701-4800batch: iter_time=1.766e-04, forward_time=0.117, loss_ctc=67.949, loss_att=64.983, acc=0.643, loss=65.873, backward_time=0.136, grad_norm=26.353, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.403e-04, train_time=2.969 +[gpuc01:0/16] 2024-01-23 17:00:38,954 (trainer:737) INFO: 6epoch:train:4801-4900batch: iter_time=1.823e-04, forward_time=0.117, loss_ctc=65.472, loss_att=57.452, acc=0.640, loss=59.858, backward_time=0.135, grad_norm=25.391, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.402e-04, train_time=2.975 +[gpuc01:0/16] 2024-01-23 17:03:10,671 (trainer:737) INFO: 6epoch:train:4901-5000batch: iter_time=1.820e-04, forward_time=0.117, loss_ctc=77.971, loss_att=63.545, acc=0.644, loss=67.873, backward_time=0.136, grad_norm=36.471, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.400e-04, train_time=3.034 +[gpuc01:0/16] 2024-01-23 17:03:13,862 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpuc01:0/16] 2024-01-23 17:03:32,685 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 17:03:36,221 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 17:03:36,222 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpuc01:0/16] 2024-01-23 17:03:36,228 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 17:07:59,427 (trainer:737) INFO: 6epoch:train:5001-5100batch: iter_time=1.419, forward_time=0.117, loss_ctc=70.875, loss_att=67.986, acc=0.644, loss=68.852, backward_time=0.135, grad_norm=29.054, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.399e-04, train_time=5.775 +[gpuc01:0/16] 2024-01-23 17:10:27,650 (trainer:737) INFO: 6epoch:train:5101-5200batch: iter_time=2.101e-04, forward_time=0.116, loss_ctc=68.212, loss_att=69.807, acc=0.638, loss=69.328, backward_time=0.136, grad_norm=27.528, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.397e-04, train_time=2.964 +[gpuc01:0/16] 2024-01-23 17:12:54,393 (trainer:737) INFO: 6epoch:train:5201-5300batch: iter_time=2.030e-04, forward_time=0.118, loss_ctc=74.777, loss_att=79.448, acc=0.616, loss=78.047, backward_time=0.136, grad_norm=29.070, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.396e-04, train_time=2.935 +[gpuc01:0/16] 2024-01-23 17:15:21,307 (trainer:737) INFO: 6epoch:train:5301-5400batch: iter_time=1.961e-04, forward_time=0.117, loss_ctc=72.200, loss_att=66.364, acc=0.627, loss=68.115, backward_time=0.135, grad_norm=29.377, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.394e-04, train_time=2.938 +[gpuc01:0/16] 2024-01-23 17:17:48,942 (trainer:737) INFO: 6epoch:train:5401-5500batch: iter_time=1.847e-04, forward_time=0.117, loss_ctc=73.759, loss_att=68.213, acc=0.636, loss=69.877, backward_time=0.135, grad_norm=29.575, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.393e-04, train_time=2.952 +[gpuc01:0/16] 2024-01-23 17:20:16,382 (trainer:737) INFO: 6epoch:train:5501-5600batch: iter_time=1.721e-04, forward_time=0.117, loss_ctc=66.342, loss_att=67.419, acc=0.634, loss=67.096, backward_time=0.135, grad_norm=25.997, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.392e-04, train_time=2.949 +[gpuc01:0/16] 2024-01-23 17:22:45,716 (trainer:737) INFO: 6epoch:train:5601-5700batch: iter_time=1.773e-04, forward_time=0.116, loss_ctc=65.241, loss_att=66.963, acc=0.615, loss=66.446, backward_time=0.135, grad_norm=23.002, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.066, optim0_lr0=4.390e-04, train_time=2.986 +[gpuc01:0/16] 2024-01-23 17:25:12,939 (trainer:737) INFO: 6epoch:train:5701-5800batch: iter_time=1.860e-04, forward_time=0.116, loss_ctc=64.746, loss_att=63.643, acc=0.639, loss=63.974, backward_time=0.135, grad_norm=26.970, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.389e-04, train_time=2.944 +[gpuc01:0/16] 2024-01-23 17:27:41,326 (trainer:737) INFO: 6epoch:train:5801-5900batch: iter_time=1.928e-04, forward_time=0.117, loss_ctc=82.646, loss_att=68.726, acc=0.638, loss=72.902, backward_time=0.135, grad_norm=33.145, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.387e-04, train_time=2.968 +[gpuc01:0/16] 2024-01-23 17:30:08,690 (trainer:737) INFO: 6epoch:train:5901-6000batch: iter_time=2.103e-04, forward_time=0.116, loss_ctc=71.723, loss_att=64.160, acc=0.639, loss=66.429, backward_time=0.135, grad_norm=29.370, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.067, optim0_lr0=4.386e-04, train_time=2.947 +[gpuc01:0/16] 2024-01-23 17:32:34,616 (trainer:737) INFO: 6epoch:train:6001-6100batch: iter_time=2.045e-04, forward_time=0.117, loss_ctc=70.042, loss_att=63.635, acc=0.645, loss=65.557, backward_time=0.135, grad_norm=29.497, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.385e-04, train_time=2.918 +[gpuc01:0/16] 2024-01-23 17:35:00,797 (trainer:737) INFO: 6epoch:train:6101-6200batch: iter_time=2.069e-04, forward_time=0.117, loss_ctc=70.094, loss_att=64.049, acc=0.640, loss=65.862, backward_time=0.135, grad_norm=30.107, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.383e-04, train_time=2.923 +[gpuc01:0/16] 2024-01-23 17:37:26,784 (trainer:737) INFO: 6epoch:train:6201-6300batch: iter_time=2.188e-04, forward_time=0.117, loss_ctc=71.029, loss_att=65.074, acc=0.628, loss=66.860, backward_time=0.135, grad_norm=27.618, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.382e-04, train_time=2.920 +[gpuc01:0/16] 2024-01-23 17:39:54,411 (trainer:737) INFO: 6epoch:train:6301-6400batch: iter_time=1.996e-04, forward_time=0.117, loss_ctc=70.174, loss_att=66.533, acc=0.649, loss=67.625, backward_time=0.135, grad_norm=28.313, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.380e-04, train_time=2.952 +[gpuc01:0/16] 2024-01-23 17:42:21,484 (trainer:737) INFO: 6epoch:train:6401-6500batch: iter_time=2.024e-04, forward_time=0.116, loss_ctc=54.584, loss_att=49.842, acc=0.639, loss=51.265, backward_time=0.135, grad_norm=23.620, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.379e-04, train_time=2.941 +[gpuc01:0/16] 2024-01-23 17:44:48,741 (trainer:737) INFO: 6epoch:train:6501-6600batch: iter_time=2.064e-04, forward_time=0.117, loss_ctc=87.995, loss_att=75.092, acc=0.628, loss=78.963, backward_time=0.136, grad_norm=32.235, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.378e-04, train_time=2.945 +[gpuc01:0/16] 2024-01-23 17:47:16,017 (trainer:737) INFO: 6epoch:train:6601-6700batch: iter_time=2.067e-04, forward_time=0.117, loss_ctc=69.703, loss_att=76.084, acc=0.624, loss=74.170, backward_time=0.136, grad_norm=27.008, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.376e-04, train_time=2.945 +[gpuc01:0/16] 2024-01-23 17:49:42,716 (trainer:737) INFO: 6epoch:train:6701-6800batch: iter_time=2.062e-04, forward_time=0.116, loss_ctc=69.201, loss_att=67.111, acc=0.627, loss=67.738, backward_time=0.135, grad_norm=25.841, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.375e-04, train_time=2.934 +[gpuc01:0/16] 2024-01-23 17:52:10,190 (trainer:737) INFO: 6epoch:train:6801-6900batch: iter_time=1.943e-04, forward_time=0.118, loss_ctc=73.851, loss_att=77.740, acc=0.624, loss=76.573, backward_time=0.136, grad_norm=28.743, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.373e-04, train_time=2.949 +[gpuc01:0/16] 2024-01-23 17:54:37,533 (trainer:737) INFO: 6epoch:train:6901-7000batch: iter_time=1.773e-04, forward_time=0.117, loss_ctc=87.676, loss_att=81.523, acc=0.640, loss=83.369, backward_time=0.136, grad_norm=31.334, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.372e-04, train_time=2.947 +[gpuc01:0/16] 2024-01-23 17:57:05,604 (trainer:737) INFO: 6epoch:train:7001-7100batch: iter_time=1.738e-04, forward_time=0.119, loss_ctc=63.948, loss_att=58.702, acc=0.653, loss=60.276, backward_time=0.135, grad_norm=23.895, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.371e-04, train_time=2.961 +[gpuc01:0/16] 2024-01-23 17:59:32,877 (trainer:737) INFO: 6epoch:train:7101-7200batch: iter_time=1.697e-04, forward_time=0.117, loss_ctc=72.348, loss_att=76.123, acc=0.613, loss=74.991, backward_time=0.136, grad_norm=27.808, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.369e-04, train_time=2.945 +[gpuc01:0/16] 2024-01-23 18:02:00,182 (trainer:737) INFO: 6epoch:train:7201-7300batch: iter_time=1.772e-04, forward_time=0.116, loss_ctc=66.820, loss_att=65.272, acc=0.635, loss=65.736, backward_time=0.135, grad_norm=27.603, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.368e-04, train_time=2.946 +[gpuc01:0/16] 2024-01-23 18:04:28,531 (trainer:737) INFO: 6epoch:train:7301-7400batch: iter_time=1.643e-04, forward_time=0.115, loss_ctc=63.822, loss_att=56.930, acc=0.644, loss=58.997, backward_time=0.134, grad_norm=25.535, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.366e-04, train_time=2.967 +[gpuc01:0/16] 2024-01-23 18:06:56,828 (trainer:737) INFO: 6epoch:train:7401-7500batch: iter_time=1.658e-04, forward_time=0.115, loss_ctc=75.893, loss_att=62.017, acc=0.645, loss=66.180, backward_time=0.135, grad_norm=32.122, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.365e-04, train_time=2.966 +[gpuc01:0/16] 2024-01-23 18:06:59,485 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpuc01:0/16] 2024-01-23 18:07:18,562 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 18:07:22,112 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 18:07:22,112 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpuc01:0/16] 2024-01-23 18:07:22,118 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 18:11:50,561 (trainer:737) INFO: 6epoch:train:7501-7600batch: iter_time=1.401, forward_time=0.114, loss_ctc=69.514, loss_att=65.800, acc=0.650, loss=66.914, backward_time=0.135, grad_norm=29.205, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.364e-04, train_time=5.874 +[gpuc01:0/16] 2024-01-23 18:14:23,981 (trainer:737) INFO: 6epoch:train:7601-7700batch: iter_time=1.693e-04, forward_time=0.115, loss_ctc=67.245, loss_att=67.187, acc=0.640, loss=67.204, backward_time=0.136, grad_norm=26.211, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.362e-04, train_time=3.068 +[gpuc01:0/16] 2024-01-23 18:16:58,413 (trainer:737) INFO: 6epoch:train:7701-7800batch: iter_time=1.592e-04, forward_time=0.117, loss_ctc=74.147, loss_att=76.790, acc=0.619, loss=75.997, backward_time=0.136, grad_norm=26.864, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.361e-04, train_time=3.088 +[gpuc01:0/16] 2024-01-23 18:19:31,625 (trainer:737) INFO: 6epoch:train:7801-7900batch: iter_time=1.644e-04, forward_time=0.114, loss_ctc=70.880, loss_att=63.745, acc=0.636, loss=65.886, backward_time=0.135, grad_norm=25.699, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.359e-04, train_time=3.064 +[gpuc01:0/16] 2024-01-23 18:22:04,633 (trainer:737) INFO: 6epoch:train:7901-8000batch: iter_time=1.603e-04, forward_time=0.114, loss_ctc=71.959, loss_att=65.805, acc=0.643, loss=67.652, backward_time=0.135, grad_norm=27.725, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.358e-04, train_time=3.060 +[gpuc01:0/16] 2024-01-23 18:24:37,397 (trainer:737) INFO: 6epoch:train:8001-8100batch: iter_time=1.508e-04, forward_time=0.114, loss_ctc=66.314, loss_att=67.382, acc=0.636, loss=67.062, backward_time=0.135, grad_norm=24.624, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.357e-04, train_time=3.055 +[gpuc01:0/16] 2024-01-23 18:27:09,935 (trainer:737) INFO: 6epoch:train:8101-8200batch: iter_time=1.710e-04, forward_time=0.114, loss_ctc=64.812, loss_att=66.557, acc=0.616, loss=66.033, backward_time=0.134, grad_norm=23.642, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.355e-04, train_time=3.051 +[gpuc01:0/16] 2024-01-23 18:29:43,159 (trainer:737) INFO: 6epoch:train:8201-8300batch: iter_time=1.555e-04, forward_time=0.114, loss_ctc=63.993, loss_att=61.027, acc=0.645, loss=61.916, backward_time=0.135, grad_norm=26.180, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.354e-04, train_time=3.064 +[gpuc01:0/16] 2024-01-23 18:32:14,389 (trainer:737) INFO: 6epoch:train:8301-8400batch: iter_time=1.525e-04, forward_time=0.114, loss_ctc=82.546, loss_att=68.479, acc=0.637, loss=72.699, backward_time=0.135, grad_norm=32.336, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.353e-04, train_time=3.024 +[gpuc01:0/16] 2024-01-23 18:34:47,963 (trainer:737) INFO: 6epoch:train:8401-8500batch: iter_time=1.681e-04, forward_time=0.114, loss_ctc=69.669, loss_att=62.311, acc=0.646, loss=64.518, backward_time=0.135, grad_norm=25.831, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.351e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-23 18:37:22,952 (trainer:737) INFO: 6epoch:train:8501-8600batch: iter_time=1.493e-04, forward_time=0.114, loss_ctc=69.931, loss_att=62.032, acc=0.651, loss=64.401, backward_time=0.135, grad_norm=32.689, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.350e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-23 18:39:56,495 (trainer:737) INFO: 6epoch:train:8601-8700batch: iter_time=1.480e-04, forward_time=0.114, loss_ctc=69.126, loss_att=62.979, acc=0.641, loss=64.823, backward_time=0.134, grad_norm=26.346, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.348e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-23 18:42:30,193 (trainer:737) INFO: 6epoch:train:8701-8800batch: iter_time=1.509e-04, forward_time=0.114, loss_ctc=70.979, loss_att=63.349, acc=0.633, loss=65.638, backward_time=0.134, grad_norm=29.282, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.347e-04, train_time=3.074 +[gpuc01:0/16] 2024-01-23 18:45:05,521 (trainer:737) INFO: 6epoch:train:8801-8900batch: iter_time=1.463e-04, forward_time=0.115, loss_ctc=67.974, loss_att=65.104, acc=0.653, loss=65.965, backward_time=0.135, grad_norm=27.893, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.346e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-23 18:47:38,990 (trainer:737) INFO: 6epoch:train:8901-9000batch: iter_time=1.514e-04, forward_time=0.114, loss_ctc=53.519, loss_att=49.467, acc=0.641, loss=50.682, backward_time=0.134, grad_norm=22.065, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.344e-04, train_time=3.069 +[gpuc01:0/16] 2024-01-23 18:50:12,987 (trainer:737) INFO: 6epoch:train:9001-9100batch: iter_time=1.381e-04, forward_time=0.116, loss_ctc=87.031, loss_att=73.110, acc=0.633, loss=77.286, backward_time=0.135, grad_norm=32.947, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.343e-04, train_time=3.080 +[gpuc01:0/16] 2024-01-23 18:52:46,865 (trainer:737) INFO: 6epoch:train:9101-9200batch: iter_time=1.502e-04, forward_time=0.116, loss_ctc=68.452, loss_att=75.273, acc=0.629, loss=73.227, backward_time=0.135, grad_norm=27.029, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.342e-04, train_time=3.077 +[gpuc01:0/16] 2024-01-23 18:55:20,488 (trainer:737) INFO: 6epoch:train:9201-9300batch: iter_time=1.496e-04, forward_time=0.115, loss_ctc=68.908, loss_att=66.250, acc=0.629, loss=67.047, backward_time=0.134, grad_norm=27.121, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.340e-04, train_time=3.072 +[gpuc01:0/16] 2024-01-23 18:57:53,805 (trainer:737) INFO: 6epoch:train:9301-9400batch: iter_time=1.365e-04, forward_time=0.117, loss_ctc=72.152, loss_att=74.899, acc=0.633, loss=74.075, backward_time=0.135, grad_norm=29.181, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.339e-04, train_time=3.066 +[gpuc01:0/16] 2024-01-23 19:00:27,663 (trainer:737) INFO: 6epoch:train:9401-9500batch: iter_time=1.372e-04, forward_time=0.117, loss_ctc=86.591, loss_att=78.977, acc=0.647, loss=81.261, backward_time=0.136, grad_norm=31.523, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.338e-04, train_time=3.077 +[gpuc01:0/16] 2024-01-23 19:03:01,536 (trainer:737) INFO: 6epoch:train:9501-9600batch: iter_time=1.340e-04, forward_time=0.115, loss_ctc=63.471, loss_att=57.919, acc=0.657, loss=59.585, backward_time=0.135, grad_norm=23.437, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.336e-04, train_time=3.077 +[gpuc01:0/16] 2024-01-23 19:05:35,775 (trainer:737) INFO: 6epoch:train:9601-9700batch: iter_time=1.399e-04, forward_time=0.119, loss_ctc=71.601, loss_att=74.820, acc=0.620, loss=73.854, backward_time=0.135, grad_norm=29.582, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.067, optim0_lr0=4.335e-04, train_time=3.085 +[gpuc01:0/16] 2024-01-23 19:08:09,294 (trainer:737) INFO: 6epoch:train:9701-9800batch: iter_time=1.444e-04, forward_time=0.117, loss_ctc=66.200, loss_att=63.899, acc=0.641, loss=64.589, backward_time=0.135, grad_norm=26.219, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.333e-04, train_time=3.070 +[gpuc01:0/16] 2024-01-23 19:10:43,995 (trainer:737) INFO: 6epoch:train:9801-9900batch: iter_time=1.361e-04, forward_time=0.119, loss_ctc=63.048, loss_att=56.287, acc=0.649, loss=58.315, backward_time=0.134, grad_norm=25.170, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.332e-04, train_time=3.094 +[gpuc01:0/16] 2024-01-23 19:13:17,226 (trainer:737) INFO: 6epoch:train:9901-10000batch: iter_time=3.187e-04, forward_time=0.117, loss_ctc=73.969, loss_att=60.377, acc=0.650, loss=64.454, backward_time=0.135, grad_norm=37.831, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.066, optim0_lr0=4.331e-04, train_time=3.064 +[gpuc01:0/16] 2024-01-23 19:13:19,890 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpuc01:0/16] 2024-01-23 19:13:39,735 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 19:13:43,338 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 19:13:43,338 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpuc01:0/16] 2024-01-23 19:13:43,344 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 19:18:12,612 (trainer:737) INFO: 6epoch:train:10001-10100batch: iter_time=1.392, forward_time=0.116, loss_ctc=68.750, loss_att=67.302, acc=0.659, loss=67.737, backward_time=0.135, grad_norm=31.158, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.329e-04, train_time=5.907 +[gpuc01:0/16] 2024-01-23 19:20:47,320 (trainer:737) INFO: 6epoch:train:10101-10200batch: iter_time=1.705e-04, forward_time=0.116, loss_ctc=67.532, loss_att=70.880, acc=0.642, loss=69.876, backward_time=0.135, grad_norm=26.065, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.328e-04, train_time=3.094 +[gpuc01:0/16] 2024-01-23 19:23:21,960 (trainer:737) INFO: 6epoch:train:10201-10300batch: iter_time=1.694e-04, forward_time=0.116, loss_ctc=72.672, loss_att=78.612, acc=0.627, loss=76.830, backward_time=0.135, grad_norm=27.436, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.327e-04, train_time=3.093 +[gpuc01:0/16] 2024-01-23 19:25:56,416 (trainer:737) INFO: 6epoch:train:10301-10400batch: iter_time=1.621e-04, forward_time=0.115, loss_ctc=70.345, loss_att=70.247, acc=0.634, loss=70.277, backward_time=0.134, grad_norm=28.640, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.325e-04, train_time=3.089 +[gpuc01:0/16] 2024-01-23 19:28:29,934 (trainer:737) INFO: 6epoch:train:10401-10500batch: iter_time=1.648e-04, forward_time=0.115, loss_ctc=70.841, loss_att=65.623, acc=0.652, loss=67.189, backward_time=0.135, grad_norm=26.826, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.324e-04, train_time=3.070 +[gpuc01:0/16] 2024-01-23 19:31:04,979 (trainer:737) INFO: 6epoch:train:10501-10600batch: iter_time=1.536e-04, forward_time=0.116, loss_ctc=64.431, loss_att=69.794, acc=0.644, loss=68.185, backward_time=0.135, grad_norm=25.719, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.323e-04, train_time=3.101 +[gpuc01:0/16] 2024-01-23 19:33:39,728 (trainer:737) INFO: 6epoch:train:10601-10700batch: iter_time=1.642e-04, forward_time=0.115, loss_ctc=64.570, loss_att=66.330, acc=0.632, loss=65.802, backward_time=0.135, grad_norm=24.967, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.321e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 19:36:14,075 (trainer:737) INFO: 6epoch:train:10701-10800batch: iter_time=1.641e-04, forward_time=0.118, loss_ctc=62.847, loss_att=62.781, acc=0.651, loss=62.800, backward_time=0.134, grad_norm=26.109, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.320e-04, train_time=3.087 +[gpuc01:0/16] 2024-01-23 19:38:48,540 (trainer:737) INFO: 6epoch:train:10801-10900batch: iter_time=1.558e-04, forward_time=0.116, loss_ctc=80.497, loss_att=68.637, acc=0.651, loss=72.195, backward_time=0.135, grad_norm=30.292, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.319e-04, train_time=3.089 +[gpuc01:0/16] 2024-01-23 19:41:22,426 (trainer:737) INFO: 6epoch:train:10901-11000batch: iter_time=1.552e-04, forward_time=0.116, loss_ctc=68.990, loss_att=63.077, acc=0.658, loss=64.851, backward_time=0.135, grad_norm=25.825, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.317e-04, train_time=3.078 +[gpuc01:0/16] 2024-01-23 19:43:57,020 (trainer:737) INFO: 6epoch:train:11001-11100batch: iter_time=1.507e-04, forward_time=0.116, loss_ctc=68.767, loss_att=62.718, acc=0.658, loss=64.533, backward_time=0.134, grad_norm=28.878, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.316e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-23 19:46:31,146 (trainer:737) INFO: 6epoch:train:11101-11200batch: iter_time=1.596e-04, forward_time=0.115, loss_ctc=68.856, loss_att=63.708, acc=0.653, loss=65.252, backward_time=0.135, grad_norm=28.358, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.315e-04, train_time=3.082 +[gpuc01:0/16] 2024-01-23 19:49:06,359 (trainer:737) INFO: 6epoch:train:11201-11300batch: iter_time=1.667e-04, forward_time=0.116, loss_ctc=69.760, loss_att=63.714, acc=0.644, loss=65.528, backward_time=0.135, grad_norm=28.401, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.313e-04, train_time=3.104 +[gpuc01:0/16] 2024-01-23 19:51:41,182 (trainer:737) INFO: 6epoch:train:11301-11400batch: iter_time=1.642e-04, forward_time=0.115, loss_ctc=67.051, loss_att=65.617, acc=0.663, loss=66.047, backward_time=0.135, grad_norm=27.318, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.312e-04, train_time=3.096 +[gpuc01:0/16] 2024-01-23 19:54:15,785 (trainer:737) INFO: 6epoch:train:11401-11500batch: iter_time=1.604e-04, forward_time=0.115, loss_ctc=52.710, loss_att=48.353, acc=0.654, loss=49.660, backward_time=0.134, grad_norm=22.518, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.311e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-23 19:56:50,555 (trainer:737) INFO: 6epoch:train:11501-11600batch: iter_time=1.650e-04, forward_time=0.116, loss_ctc=86.920, loss_att=75.005, acc=0.639, loss=78.580, backward_time=0.135, grad_norm=32.593, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.309e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 19:59:26,579 (trainer:737) INFO: 6epoch:train:11601-11700batch: iter_time=1.713e-04, forward_time=0.116, loss_ctc=68.121, loss_att=74.579, acc=0.641, loss=72.641, backward_time=0.135, grad_norm=29.025, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.308e-04, train_time=3.120 +[gpuc01:0/16] 2024-01-23 20:02:01,326 (trainer:737) INFO: 6epoch:train:11701-11800batch: iter_time=1.569e-04, forward_time=0.116, loss_ctc=67.359, loss_att=65.966, acc=0.647, loss=66.384, backward_time=0.135, grad_norm=24.960, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.307e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 20:04:35,910 (trainer:737) INFO: 6epoch:train:11801-11900batch: iter_time=1.588e-04, forward_time=0.116, loss_ctc=72.055, loss_att=77.505, acc=0.639, loss=75.870, backward_time=0.135, grad_norm=33.565, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.305e-04, train_time=3.091 +[gpuc01:0/16] 2024-01-23 20:07:10,515 (trainer:737) INFO: 6epoch:train:11901-12000batch: iter_time=1.586e-04, forward_time=0.116, loss_ctc=86.497, loss_att=84.227, acc=0.642, loss=84.908, backward_time=0.135, grad_norm=31.563, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.304e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-23 20:09:45,093 (trainer:737) INFO: 6epoch:train:12001-12100batch: iter_time=1.624e-04, forward_time=0.115, loss_ctc=62.885, loss_att=58.211, acc=0.672, loss=59.613, backward_time=0.134, grad_norm=24.622, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.303e-04, train_time=3.091 +[gpuc01:0/16] 2024-01-23 20:12:20,709 (trainer:737) INFO: 6epoch:train:12101-12200batch: iter_time=1.503e-04, forward_time=0.116, loss_ctc=72.182, loss_att=77.616, acc=0.632, loss=75.986, backward_time=0.136, grad_norm=28.879, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.301e-04, train_time=3.112 +[gpuc01:0/16] 2024-01-23 20:14:56,625 (trainer:737) INFO: 6epoch:train:12201-12300batch: iter_time=1.503e-04, forward_time=0.115, loss_ctc=65.588, loss_att=63.151, acc=0.653, loss=63.882, backward_time=0.134, grad_norm=25.898, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.300e-04, train_time=3.118 +[gpuc01:0/16] 2024-01-23 20:17:31,375 (trainer:737) INFO: 6epoch:train:12301-12400batch: iter_time=1.581e-04, forward_time=0.115, loss_ctc=62.684, loss_att=55.332, acc=0.653, loss=57.538, backward_time=0.134, grad_norm=24.064, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.067, optim0_lr0=4.299e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 20:20:06,532 (trainer:737) INFO: 6epoch:train:12401-12500batch: iter_time=1.572e-04, forward_time=0.115, loss_ctc=73.083, loss_att=60.830, acc=0.654, loss=64.506, backward_time=0.134, grad_norm=32.767, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.297e-04, train_time=3.103 +[gpuc01:0/16] 2024-01-23 20:20:09,142 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpuc01:0/16] 2024-01-23 20:20:27,878 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 20:20:33,762 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 20:20:33,762 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpuc01:0/16] 2024-01-23 20:20:33,768 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 20:27:20,466 (trainer:737) INFO: 6epoch:train:12501-12600batch: iter_time=2.714, forward_time=0.178, loss_ctc=68.077, loss_att=64.747, acc=0.664, loss=65.746, backward_time=0.144, grad_norm=28.904, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.072, optim0_lr0=4.296e-04, train_time=8.678 +[gpuc01:0/16] 2024-01-23 20:29:56,210 (trainer:737) INFO: 6epoch:train:12601-12700batch: iter_time=1.511e-04, forward_time=0.116, loss_ctc=66.258, loss_att=67.272, acc=0.651, loss=66.968, backward_time=0.135, grad_norm=26.462, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.295e-04, train_time=3.115 +[gpuc01:0/16] 2024-01-23 20:32:31,999 (trainer:737) INFO: 6epoch:train:12701-12800batch: iter_time=1.532e-04, forward_time=0.117, loss_ctc=72.889, loss_att=76.489, acc=0.633, loss=75.409, backward_time=0.135, grad_norm=28.381, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.293e-04, train_time=3.116 +[gpuc01:0/16] 2024-01-23 20:35:07,320 (trainer:737) INFO: 6epoch:train:12801-12900batch: iter_time=1.463e-04, forward_time=0.118, loss_ctc=69.419, loss_att=67.805, acc=0.641, loss=68.290, backward_time=0.134, grad_norm=27.583, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.292e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-23 20:37:41,268 (trainer:737) INFO: 6epoch:train:12901-13000batch: iter_time=1.468e-04, forward_time=0.118, loss_ctc=70.421, loss_att=64.820, acc=0.656, loss=66.500, backward_time=0.135, grad_norm=29.032, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.291e-04, train_time=3.079 +[gpuc01:0/16] 2024-01-23 20:40:15,042 (trainer:737) INFO: 6epoch:train:13001-13100batch: iter_time=1.501e-04, forward_time=0.117, loss_ctc=63.901, loss_att=66.939, acc=0.651, loss=66.028, backward_time=0.135, grad_norm=25.580, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.289e-04, train_time=3.075 +[gpuc01:0/16] 2024-01-23 20:42:50,007 (trainer:737) INFO: 6epoch:train:13101-13200batch: iter_time=1.468e-04, forward_time=0.116, loss_ctc=63.011, loss_att=63.885, acc=0.636, loss=63.623, backward_time=0.135, grad_norm=24.577, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.288e-04, train_time=3.099 +[gpuc01:0/16] 2024-01-23 20:45:25,885 (trainer:737) INFO: 6epoch:train:13201-13300batch: iter_time=1.566e-04, forward_time=0.115, loss_ctc=62.372, loss_att=61.825, acc=0.653, loss=61.989, backward_time=0.134, grad_norm=26.327, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.287e-04, train_time=3.117 +[gpuc01:0/16] 2024-01-23 20:48:01,363 (trainer:737) INFO: 6epoch:train:13301-13400batch: iter_time=1.479e-04, forward_time=0.116, loss_ctc=79.097, loss_att=67.727, acc=0.655, loss=71.138, backward_time=0.135, grad_norm=31.131, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.285e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-23 20:50:36,420 (trainer:737) INFO: 6epoch:train:13401-13500batch: iter_time=1.422e-04, forward_time=0.116, loss_ctc=68.189, loss_att=61.574, acc=0.663, loss=63.558, backward_time=0.135, grad_norm=27.185, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.284e-04, train_time=3.101 +[gpuc01:0/16] 2024-01-23 20:53:11,383 (trainer:737) INFO: 6epoch:train:13501-13600batch: iter_time=1.457e-04, forward_time=0.116, loss_ctc=67.951, loss_att=60.924, acc=0.664, loss=63.032, backward_time=0.135, grad_norm=27.629, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.283e-04, train_time=3.099 +[gpuc01:0/16] 2024-01-23 20:55:46,410 (trainer:737) INFO: 6epoch:train:13601-13700batch: iter_time=1.418e-04, forward_time=0.116, loss_ctc=67.844, loss_att=61.687, acc=0.659, loss=63.534, backward_time=0.135, grad_norm=26.553, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.282e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-23 20:58:20,880 (trainer:737) INFO: 6epoch:train:13701-13800batch: iter_time=1.464e-04, forward_time=0.116, loss_ctc=69.107, loss_att=62.284, acc=0.648, loss=64.331, backward_time=0.135, grad_norm=28.121, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.280e-04, train_time=3.089 +[gpuc01:0/16] 2024-01-23 21:00:54,528 (trainer:737) INFO: 6epoch:train:13801-13900batch: iter_time=1.428e-04, forward_time=0.116, loss_ctc=66.614, loss_att=64.510, acc=0.665, loss=65.141, backward_time=0.135, grad_norm=26.163, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.279e-04, train_time=3.073 +[gpuc01:0/16] 2024-01-23 21:03:28,038 (trainer:737) INFO: 6epoch:train:13901-14000batch: iter_time=1.513e-04, forward_time=0.116, loss_ctc=52.487, loss_att=47.889, acc=0.658, loss=49.269, backward_time=0.134, grad_norm=21.708, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.066, optim0_lr0=4.278e-04, train_time=3.070 +[gpuc01:0/16] 2024-01-23 21:06:02,920 (trainer:737) INFO: 6epoch:train:14001-14100batch: iter_time=1.542e-04, forward_time=0.117, loss_ctc=86.462, loss_att=73.367, acc=0.642, loss=77.296, backward_time=0.135, grad_norm=31.941, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.276e-04, train_time=3.097 +[gpuc01:0/16] 2024-01-23 21:08:37,842 (trainer:737) INFO: 6epoch:train:14101-14200batch: iter_time=1.377e-04, forward_time=0.119, loss_ctc=67.859, loss_att=73.445, acc=0.645, loss=71.769, backward_time=0.135, grad_norm=28.147, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.275e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-23 21:11:11,736 (trainer:737) INFO: 6epoch:train:14201-14300batch: iter_time=1.420e-04, forward_time=0.117, loss_ctc=66.329, loss_att=64.205, acc=0.654, loss=64.842, backward_time=0.135, grad_norm=26.004, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.274e-04, train_time=3.078 +[gpuc01:0/16] 2024-01-23 21:13:45,282 (trainer:737) INFO: 6epoch:train:14301-14400batch: iter_time=1.447e-04, forward_time=0.117, loss_ctc=70.256, loss_att=74.891, acc=0.645, loss=73.501, backward_time=0.136, grad_norm=27.569, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.272e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-23 21:16:20,379 (trainer:737) INFO: 6epoch:train:14401-14500batch: iter_time=1.501e-04, forward_time=0.117, loss_ctc=84.405, loss_att=81.970, acc=0.648, loss=82.700, backward_time=0.135, grad_norm=29.439, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.271e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-23 21:18:54,974 (trainer:737) INFO: 6epoch:train:14501-14600batch: iter_time=1.484e-04, forward_time=0.116, loss_ctc=62.361, loss_att=56.708, acc=0.675, loss=58.404, backward_time=0.135, grad_norm=22.697, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.065, optim0_lr0=4.270e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-23 21:21:30,785 (trainer:737) INFO: 6epoch:train:14601-14700batch: iter_time=1.467e-04, forward_time=0.117, loss_ctc=69.739, loss_att=74.513, acc=0.639, loss=73.081, backward_time=0.136, grad_norm=27.319, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.269e-04, train_time=3.116 +[gpuc01:0/16] 2024-01-23 21:24:05,560 (trainer:737) INFO: 6epoch:train:14701-14800batch: iter_time=1.496e-04, forward_time=0.116, loss_ctc=65.151, loss_att=63.425, acc=0.654, loss=63.942, backward_time=0.135, grad_norm=25.838, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.065, optim0_lr0=4.267e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 21:26:40,532 (trainer:737) INFO: 6epoch:train:14801-14900batch: iter_time=1.609e-04, forward_time=0.116, loss_ctc=61.964, loss_att=54.608, acc=0.658, loss=56.815, backward_time=0.134, grad_norm=25.168, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.065, optim0_lr0=4.266e-04, train_time=3.099 +[gpuc01:0/16] 2024-01-23 21:29:17,012 (trainer:737) INFO: 6epoch:train:14901-15000batch: iter_time=1.416e-04, forward_time=0.116, loss_ctc=71.844, loss_att=59.679, acc=0.657, loss=63.329, backward_time=0.135, grad_norm=32.814, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.265e-04, train_time=3.129 +[gpuc01:0/16] 2024-01-23 21:29:31,846 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpuc01:0/16] 2024-01-23 21:29:52,203 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 21:29:55,901 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 21:29:55,901 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpuc01:0/16] 2024-01-23 21:29:55,908 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 21:34:51,866 (trainer:737) INFO: 6epoch:train:15001-15100batch: iter_time=1.765, forward_time=0.116, loss_ctc=67.329, loss_att=65.756, acc=0.656, loss=66.228, backward_time=0.135, grad_norm=27.209, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.263e-04, train_time=6.697 +[gpuc01:0/16] 2024-01-23 21:37:26,053 (trainer:737) INFO: 6epoch:train:15101-15200batch: iter_time=1.554e-04, forward_time=0.116, loss_ctc=66.238, loss_att=68.201, acc=0.649, loss=67.612, backward_time=0.135, grad_norm=25.883, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.262e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-23 21:40:00,779 (trainer:737) INFO: 6epoch:train:15201-15300batch: iter_time=1.385e-04, forward_time=0.118, loss_ctc=71.612, loss_att=75.534, acc=0.627, loss=74.358, backward_time=0.136, grad_norm=27.656, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.261e-04, train_time=3.094 +[gpuc01:0/16] 2024-01-23 21:42:34,740 (trainer:737) INFO: 6epoch:train:15301-15400batch: iter_time=1.588e-04, forward_time=0.119, loss_ctc=68.629, loss_att=63.842, acc=0.641, loss=65.278, backward_time=0.135, grad_norm=28.482, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.067, optim0_lr0=4.259e-04, train_time=3.079 +[gpuc01:0/16] 2024-01-23 21:45:08,659 (trainer:737) INFO: 6epoch:train:15401-15500batch: iter_time=1.511e-04, forward_time=0.116, loss_ctc=69.983, loss_att=65.667, acc=0.650, loss=66.962, backward_time=0.135, grad_norm=28.133, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.258e-04, train_time=3.078 +[gpuc01:0/16] 2024-01-23 21:47:42,273 (trainer:737) INFO: 6epoch:train:15501-15600batch: iter_time=1.549e-04, forward_time=0.115, loss_ctc=63.467, loss_att=64.650, acc=0.650, loss=64.296, backward_time=0.135, grad_norm=26.181, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.257e-04, train_time=3.072 +[gpuc01:0/16] 2024-01-23 21:50:15,824 (trainer:737) INFO: 6epoch:train:15601-15700batch: iter_time=1.613e-04, forward_time=0.115, loss_ctc=62.967, loss_att=64.599, acc=0.628, loss=64.109, backward_time=0.134, grad_norm=24.418, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.256e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-23 21:52:50,014 (trainer:737) INFO: 6epoch:train:15701-15800batch: iter_time=1.615e-04, forward_time=0.115, loss_ctc=62.285, loss_att=61.124, acc=0.652, loss=61.472, backward_time=0.135, grad_norm=26.219, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.067, optim0_lr0=4.254e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-23 21:55:24,059 (trainer:737) INFO: 6epoch:train:15801-15900batch: iter_time=1.572e-04, forward_time=0.115, loss_ctc=79.641, loss_att=65.672, acc=0.649, loss=69.863, backward_time=0.135, grad_norm=32.109, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.253e-04, train_time=3.081 +[gpuc01:0/16] 2024-01-23 21:57:58,290 (trainer:737) INFO: 6epoch:train:15901-16000batch: iter_time=1.430e-04, forward_time=0.115, loss_ctc=67.729, loss_att=61.593, acc=0.654, loss=63.434, backward_time=0.135, grad_norm=24.977, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.252e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-23 22:00:33,217 (trainer:737) INFO: 6epoch:train:16001-16100batch: iter_time=1.504e-04, forward_time=0.115, loss_ctc=68.179, loss_att=61.653, acc=0.656, loss=63.611, backward_time=0.135, grad_norm=30.443, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.067, optim0_lr0=4.250e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-23 22:03:08,453 (trainer:737) INFO: 6epoch:train:16101-16200batch: iter_time=1.471e-04, forward_time=0.115, loss_ctc=68.069, loss_att=61.624, acc=0.651, loss=63.557, backward_time=0.135, grad_norm=28.171, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.249e-04, train_time=3.104 +[gpuc01:0/16] 2024-01-23 22:05:43,314 (trainer:737) INFO: 6epoch:train:16201-16300batch: iter_time=1.322e-04, forward_time=0.116, loss_ctc=68.510, loss_att=62.632, acc=0.638, loss=64.396, backward_time=0.135, grad_norm=27.372, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.248e-04, train_time=3.097 +[gpuc01:0/16] 2024-01-23 22:08:19,336 (trainer:737) INFO: 6epoch:train:16301-16400batch: iter_time=1.461e-04, forward_time=0.117, loss_ctc=66.301, loss_att=63.582, acc=0.664, loss=64.398, backward_time=0.135, grad_norm=27.368, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.065, optim0_lr0=4.247e-04, train_time=3.120 +[gpuc01:0/16] 2024-01-23 22:10:54,471 (trainer:737) INFO: 6epoch:train:16401-16500batch: iter_time=1.311e-04, forward_time=0.115, loss_ctc=51.814, loss_att=47.661, acc=0.653, loss=48.907, backward_time=0.134, grad_norm=21.766, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.245e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-23 22:13:30,566 (trainer:737) INFO: 6epoch:train:16501-16600batch: iter_time=1.367e-04, forward_time=0.116, loss_ctc=84.134, loss_att=71.381, acc=0.640, loss=75.207, backward_time=0.135, grad_norm=32.514, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.244e-04, train_time=3.122 +[gpuc01:0/16] 2024-01-23 22:16:04,775 (trainer:737) INFO: 6epoch:train:16601-16700batch: iter_time=1.400e-04, forward_time=0.116, loss_ctc=68.406, loss_att=74.330, acc=0.633, loss=72.553, backward_time=0.135, grad_norm=29.449, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.065, optim0_lr0=4.243e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-23 22:18:38,753 (trainer:737) INFO: 6epoch:train:16701-16800batch: iter_time=1.398e-04, forward_time=0.115, loss_ctc=65.770, loss_att=63.974, acc=0.637, loss=64.513, backward_time=0.134, grad_norm=26.443, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.242e-04, train_time=3.079 +[gpuc01:0/16] 2024-01-23 22:21:13,908 (trainer:737) INFO: 6epoch:train:16801-16900batch: iter_time=1.277e-04, forward_time=0.116, loss_ctc=70.890, loss_att=73.960, acc=0.636, loss=73.039, backward_time=0.135, grad_norm=27.777, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.240e-04, train_time=3.103 +[gpuc01:0/16] 2024-01-23 22:23:49,232 (trainer:737) INFO: 6epoch:train:16901-17000batch: iter_time=1.326e-04, forward_time=0.116, loss_ctc=84.056, loss_att=77.569, acc=0.653, loss=79.515, backward_time=0.135, grad_norm=29.960, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.239e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-23 22:26:23,850 (trainer:737) INFO: 6epoch:train:17001-17100batch: iter_time=1.304e-04, forward_time=0.116, loss_ctc=61.085, loss_att=56.134, acc=0.667, loss=57.619, backward_time=0.134, grad_norm=25.399, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.238e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-23 22:28:58,733 (trainer:737) INFO: 6epoch:train:17101-17200batch: iter_time=1.272e-04, forward_time=0.116, loss_ctc=68.903, loss_att=73.077, acc=0.628, loss=71.825, backward_time=0.135, grad_norm=29.096, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.236e-04, train_time=3.097 +[gpuc01:0/16] 2024-01-23 22:31:33,765 (trainer:737) INFO: 6epoch:train:17201-17300batch: iter_time=1.280e-04, forward_time=0.117, loss_ctc=64.566, loss_att=62.015, acc=0.648, loss=62.780, backward_time=0.135, grad_norm=26.473, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.235e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-23 22:34:09,068 (trainer:737) INFO: 6epoch:train:17301-17400batch: iter_time=1.248e-04, forward_time=0.115, loss_ctc=60.808, loss_att=53.315, acc=0.659, loss=55.563, backward_time=0.134, grad_norm=24.605, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.234e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-23 22:36:43,996 (trainer:737) INFO: 6epoch:train:17401-17500batch: iter_time=1.258e-04, forward_time=0.115, loss_ctc=70.227, loss_att=59.180, acc=0.653, loss=62.494, backward_time=0.134, grad_norm=31.451, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.233e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-23 22:36:46,986 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpuc01:0/16] 2024-01-23 22:37:06,264 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 22:37:10,317 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 22:37:10,317 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpuc01:0/16] 2024-01-23 22:37:10,323 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 22:43:33,317 (trainer:737) INFO: 6epoch:train:17501-17600batch: iter_time=2.511, forward_time=0.117, loss_ctc=66.238, loss_att=64.760, acc=0.666, loss=65.203, backward_time=0.135, grad_norm=28.366, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.231e-04, train_time=8.186 +[gpuc01:0/16] 2024-01-23 22:46:09,133 (trainer:737) INFO: 6epoch:train:17601-17700batch: iter_time=1.516e-04, forward_time=0.115, loss_ctc=65.755, loss_att=67.714, acc=0.652, loss=67.126, backward_time=0.135, grad_norm=26.797, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.230e-04, train_time=3.116 +[gpuc01:0/16] 2024-01-23 22:48:44,483 (trainer:737) INFO: 6epoch:train:17701-17800batch: iter_time=1.414e-04, forward_time=0.115, loss_ctc=70.496, loss_att=75.865, acc=0.638, loss=74.254, backward_time=0.136, grad_norm=32.155, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.229e-04, train_time=3.107 +[gpuc01:0/16] 2024-01-23 22:51:19,266 (trainer:737) INFO: 6epoch:train:17801-17900batch: iter_time=1.381e-04, forward_time=0.115, loss_ctc=67.573, loss_att=67.733, acc=0.643, loss=67.685, backward_time=0.135, grad_norm=27.879, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.228e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 22:53:53,737 (trainer:737) INFO: 6epoch:train:17901-18000batch: iter_time=1.548e-04, forward_time=0.115, loss_ctc=69.172, loss_att=63.396, acc=0.662, loss=65.129, backward_time=0.135, grad_norm=27.753, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.066, optim0_lr0=4.226e-04, train_time=3.089 +[gpuc01:0/16] 2024-01-23 22:56:28,254 (trainer:737) INFO: 6epoch:train:18001-18100batch: iter_time=1.451e-04, forward_time=0.116, loss_ctc=62.958, loss_att=67.139, acc=0.655, loss=65.885, backward_time=0.135, grad_norm=24.761, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.225e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-23 22:59:03,626 (trainer:737) INFO: 6epoch:train:18101-18200batch: iter_time=1.406e-04, forward_time=0.114, loss_ctc=61.898, loss_att=63.055, acc=0.642, loss=62.708, backward_time=0.135, grad_norm=23.362, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.224e-04, train_time=3.107 +[gpuc01:0/16] 2024-01-23 23:01:38,571 (trainer:737) INFO: 6epoch:train:18201-18300batch: iter_time=1.427e-04, forward_time=0.114, loss_ctc=60.961, loss_att=60.648, acc=0.657, loss=60.742, backward_time=0.134, grad_norm=25.605, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.223e-04, train_time=3.099 +[gpuc01:0/16] 2024-01-23 23:04:13,743 (trainer:737) INFO: 6epoch:train:18301-18400batch: iter_time=1.379e-04, forward_time=0.115, loss_ctc=78.378, loss_att=66.653, acc=0.660, loss=70.170, backward_time=0.135, grad_norm=30.985, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.221e-04, train_time=3.103 +[gpuc01:0/16] 2024-01-23 23:06:47,945 (trainer:737) INFO: 6epoch:train:18401-18500batch: iter_time=1.433e-04, forward_time=0.115, loss_ctc=67.892, loss_att=60.929, acc=0.667, loss=63.018, backward_time=0.135, grad_norm=26.561, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.220e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-23 23:09:23,947 (trainer:737) INFO: 6epoch:train:18501-18600batch: iter_time=1.607e-04, forward_time=0.115, loss_ctc=67.331, loss_att=60.477, acc=0.671, loss=62.533, backward_time=0.135, grad_norm=30.229, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.219e-04, train_time=3.120 +[gpuc01:0/16] 2024-01-23 23:11:59,390 (trainer:737) INFO: 6epoch:train:18601-18700batch: iter_time=1.566e-04, forward_time=0.115, loss_ctc=66.255, loss_att=60.334, acc=0.662, loss=62.110, backward_time=0.135, grad_norm=26.932, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.218e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-23 23:14:34,605 (trainer:737) INFO: 6epoch:train:18701-18800batch: iter_time=1.994e-04, forward_time=0.115, loss_ctc=68.269, loss_att=61.099, acc=0.655, loss=63.250, backward_time=0.136, grad_norm=28.894, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.216e-04, train_time=3.104 +[gpuc01:0/16] 2024-01-23 23:17:10,347 (trainer:737) INFO: 6epoch:train:18801-18900batch: iter_time=1.460e-04, forward_time=0.116, loss_ctc=64.771, loss_att=63.540, acc=0.672, loss=63.910, backward_time=0.135, grad_norm=28.294, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.215e-04, train_time=3.115 +[gpuc01:0/16] 2024-01-23 23:19:45,096 (trainer:737) INFO: 6epoch:train:18901-19000batch: iter_time=1.559e-04, forward_time=0.115, loss_ctc=51.350, loss_att=47.284, acc=0.661, loss=48.503, backward_time=0.134, grad_norm=22.770, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.214e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 23:22:20,502 (trainer:737) INFO: 6epoch:train:19001-19100batch: iter_time=1.610e-04, forward_time=0.118, loss_ctc=84.042, loss_att=71.979, acc=0.648, loss=75.598, backward_time=0.136, grad_norm=33.749, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.213e-04, train_time=3.108 +[gpuc01:0/16] 2024-01-23 23:24:55,243 (trainer:737) INFO: 6epoch:train:19101-19200batch: iter_time=1.530e-04, forward_time=0.116, loss_ctc=66.201, loss_att=72.105, acc=0.650, loss=70.334, backward_time=0.136, grad_norm=26.325, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.211e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 23:27:29,549 (trainer:737) INFO: 6epoch:train:19201-19300batch: iter_time=1.634e-04, forward_time=0.115, loss_ctc=64.971, loss_att=62.742, acc=0.658, loss=63.411, backward_time=0.135, grad_norm=24.590, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.210e-04, train_time=3.086 +[gpuc01:0/16] 2024-01-23 23:30:04,950 (trainer:737) INFO: 6epoch:train:19301-19400batch: iter_time=1.446e-04, forward_time=0.116, loss_ctc=69.989, loss_att=74.619, acc=0.648, loss=73.230, backward_time=0.136, grad_norm=25.994, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.209e-04, train_time=3.108 +[gpuc01:0/16] 2024-01-23 23:32:39,422 (trainer:737) INFO: 6epoch:train:19401-19500batch: iter_time=1.377e-04, forward_time=0.117, loss_ctc=82.612, loss_att=79.880, acc=0.653, loss=80.699, backward_time=0.135, grad_norm=28.814, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.208e-04, train_time=3.089 +[gpuc01:0/16] 2024-01-23 23:35:14,087 (trainer:737) INFO: 6epoch:train:19501-19600batch: iter_time=1.421e-04, forward_time=0.116, loss_ctc=60.458, loss_att=55.427, acc=0.682, loss=56.937, backward_time=0.135, grad_norm=23.208, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.206e-04, train_time=3.093 +[gpuc01:0/16] 2024-01-23 23:37:49,383 (trainer:737) INFO: 6epoch:train:19601-19700batch: iter_time=1.323e-04, forward_time=0.117, loss_ctc=69.010, loss_att=72.959, acc=0.644, loss=71.774, backward_time=0.136, grad_norm=29.163, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.205e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-23 23:40:24,140 (trainer:737) INFO: 6epoch:train:19701-19800batch: iter_time=1.386e-04, forward_time=0.116, loss_ctc=64.066, loss_att=61.927, acc=0.659, loss=62.569, backward_time=0.135, grad_norm=25.638, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.204e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 23:42:58,470 (trainer:737) INFO: 6epoch:train:19801-19900batch: iter_time=1.311e-04, forward_time=0.116, loss_ctc=60.158, loss_att=53.761, acc=0.662, loss=55.680, backward_time=0.134, grad_norm=24.077, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.203e-04, train_time=3.086 +[gpuc01:0/16] 2024-01-23 23:45:33,603 (trainer:737) INFO: 6epoch:train:19901-20000batch: iter_time=1.291e-04, forward_time=0.116, loss_ctc=70.576, loss_att=59.007, acc=0.660, loss=62.478, backward_time=0.134, grad_norm=31.583, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.067, optim0_lr0=4.201e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-23 23:45:37,473 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpuc01:0/16] 2024-01-23 23:45:57,559 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-23 23:46:01,378 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-23 23:46:01,378 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpuc01:0/16] 2024-01-23 23:46:01,384 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-23 23:51:52,898 (trainer:737) INFO: 6epoch:train:20001-20100batch: iter_time=2.183, forward_time=0.161, loss_ctc=65.878, loss_att=61.662, acc=0.675, loss=62.927, backward_time=0.156, grad_norm=28.015, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.072, optim0_lr0=4.200e-04, train_time=7.586 +[gpuc01:0/16] 2024-01-23 23:54:27,676 (trainer:737) INFO: 6epoch:train:20101-20200batch: iter_time=1.575e-04, forward_time=0.116, loss_ctc=64.183, loss_att=65.780, acc=0.660, loss=65.301, backward_time=0.135, grad_norm=25.463, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.199e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-23 23:57:03,375 (trainer:737) INFO: 6epoch:train:20201-20300batch: iter_time=1.418e-04, forward_time=0.116, loss_ctc=70.496, loss_att=74.045, acc=0.641, loss=72.981, backward_time=0.135, grad_norm=27.360, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.198e-04, train_time=3.114 +[gpuc01:0/16] 2024-01-23 23:59:37,340 (trainer:737) INFO: 6epoch:train:20301-20400batch: iter_time=1.423e-04, forward_time=0.116, loss_ctc=67.291, loss_att=65.697, acc=0.648, loss=66.175, backward_time=0.134, grad_norm=27.834, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.196e-04, train_time=3.079 +[gpuc01:0/16] 2024-01-24 00:02:12,268 (trainer:737) INFO: 6epoch:train:20401-20500batch: iter_time=1.348e-04, forward_time=0.119, loss_ctc=67.922, loss_att=62.952, acc=0.662, loss=64.443, backward_time=0.135, grad_norm=28.496, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.195e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-24 00:04:47,529 (trainer:737) INFO: 6epoch:train:20501-20600batch: iter_time=1.385e-04, forward_time=0.116, loss_ctc=62.475, loss_att=65.585, acc=0.658, loss=64.652, backward_time=0.135, grad_norm=25.922, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.194e-04, train_time=3.105 +[gpuc01:0/16] 2024-01-24 00:07:22,962 (trainer:737) INFO: 6epoch:train:20601-20700batch: iter_time=1.372e-04, forward_time=0.115, loss_ctc=61.077, loss_att=62.011, acc=0.649, loss=61.730, backward_time=0.134, grad_norm=22.312, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.193e-04, train_time=3.108 +[gpuc01:0/16] 2024-01-24 00:09:58,006 (trainer:737) INFO: 6epoch:train:20701-20800batch: iter_time=1.403e-04, forward_time=0.115, loss_ctc=60.824, loss_att=60.055, acc=0.659, loss=60.286, backward_time=0.134, grad_norm=27.484, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.192e-04, train_time=3.101 +[gpuc01:0/16] 2024-01-24 00:12:33,691 (trainer:737) INFO: 6epoch:train:20801-20900batch: iter_time=1.364e-04, forward_time=0.115, loss_ctc=78.767, loss_att=66.285, acc=0.659, loss=70.030, backward_time=0.135, grad_norm=33.503, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.190e-04, train_time=3.113 +[gpuc01:0/16] 2024-01-24 00:15:09,060 (trainer:737) INFO: 6epoch:train:20901-21000batch: iter_time=1.327e-04, forward_time=0.115, loss_ctc=66.299, loss_att=58.908, acc=0.672, loss=61.125, backward_time=0.135, grad_norm=23.786, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.189e-04, train_time=3.107 +[gpuc01:0/16] 2024-01-24 00:17:44,092 (trainer:737) INFO: 6epoch:train:21001-21100batch: iter_time=1.257e-04, forward_time=0.115, loss_ctc=66.187, loss_att=59.289, acc=0.671, loss=61.358, backward_time=0.134, grad_norm=27.112, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.188e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 00:20:19,365 (trainer:737) INFO: 6epoch:train:21101-21200batch: iter_time=1.362e-04, forward_time=0.116, loss_ctc=65.860, loss_att=59.707, acc=0.667, loss=61.553, backward_time=0.134, grad_norm=26.890, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.187e-04, train_time=3.105 +[gpuc01:0/16] 2024-01-24 00:22:54,881 (trainer:737) INFO: 6epoch:train:21201-21300batch: iter_time=1.284e-04, forward_time=0.115, loss_ctc=67.972, loss_att=60.282, acc=0.657, loss=62.589, backward_time=0.134, grad_norm=29.197, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.185e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 00:25:31,069 (trainer:737) INFO: 6epoch:train:21301-21400batch: iter_time=1.287e-04, forward_time=0.116, loss_ctc=64.565, loss_att=62.549, acc=0.676, loss=63.153, backward_time=0.135, grad_norm=27.114, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.184e-04, train_time=3.124 +[gpuc01:0/16] 2024-01-24 00:28:07,121 (trainer:737) INFO: 6epoch:train:21401-21500batch: iter_time=1.314e-04, forward_time=0.115, loss_ctc=50.729, loss_att=47.251, acc=0.663, loss=48.294, backward_time=0.134, grad_norm=22.153, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.183e-04, train_time=3.121 +[gpuc01:0/16] 2024-01-24 00:30:42,853 (trainer:737) INFO: 6epoch:train:21501-21600batch: iter_time=1.373e-04, forward_time=0.115, loss_ctc=82.844, loss_att=70.413, acc=0.655, loss=74.142, backward_time=0.135, grad_norm=31.786, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.182e-04, train_time=3.114 +[gpuc01:0/16] 2024-01-24 00:33:18,296 (trainer:737) INFO: 6epoch:train:21601-21700batch: iter_time=1.318e-04, forward_time=0.116, loss_ctc=65.284, loss_att=72.001, acc=0.652, loss=69.986, backward_time=0.135, grad_norm=26.827, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.181e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-24 00:35:55,062 (trainer:737) INFO: 6epoch:train:21701-21800batch: iter_time=1.283e-04, forward_time=0.115, loss_ctc=65.321, loss_att=62.566, acc=0.660, loss=63.393, backward_time=0.135, grad_norm=25.146, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.179e-04, train_time=3.135 +[gpuc01:0/16] 2024-01-24 00:38:31,114 (trainer:737) INFO: 6epoch:train:21801-21900batch: iter_time=1.263e-04, forward_time=0.116, loss_ctc=69.262, loss_att=72.946, acc=0.654, loss=71.841, backward_time=0.135, grad_norm=27.390, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.178e-04, train_time=3.121 +[gpuc01:0/16] 2024-01-24 00:41:07,752 (trainer:737) INFO: 6epoch:train:21901-22000batch: iter_time=1.253e-04, forward_time=0.116, loss_ctc=82.920, loss_att=79.995, acc=0.655, loss=80.873, backward_time=0.136, grad_norm=29.769, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.066, optim0_lr0=4.177e-04, train_time=3.133 +[gpuc01:0/16] 2024-01-24 00:43:43,442 (trainer:737) INFO: 6epoch:train:22001-22100batch: iter_time=1.331e-04, forward_time=0.116, loss_ctc=60.327, loss_att=55.320, acc=0.682, loss=56.822, backward_time=0.135, grad_norm=23.394, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.176e-04, train_time=3.114 +[gpuc01:0/16] 2024-01-24 00:46:19,033 (trainer:737) INFO: 6epoch:train:22101-22200batch: iter_time=1.229e-04, forward_time=0.117, loss_ctc=68.023, loss_att=71.627, acc=0.648, loss=70.546, backward_time=0.136, grad_norm=27.268, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.174e-04, train_time=3.112 +[gpuc01:0/16] 2024-01-24 00:48:54,560 (trainer:737) INFO: 6epoch:train:22201-22300batch: iter_time=1.208e-04, forward_time=0.116, loss_ctc=63.101, loss_att=60.153, acc=0.669, loss=61.038, backward_time=0.135, grad_norm=25.710, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.173e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 00:51:29,187 (trainer:737) INFO: 6epoch:train:22301-22400batch: iter_time=1.332e-04, forward_time=0.115, loss_ctc=60.221, loss_att=53.565, acc=0.663, loss=55.562, backward_time=0.134, grad_norm=25.203, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.172e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-24 00:54:22,618 (trainer:737) INFO: 6epoch:train:22401-22500batch: iter_time=1.173e-04, forward_time=0.126, loss_ctc=70.649, loss_att=57.735, acc=0.668, loss=61.609, backward_time=0.137, grad_norm=29.854, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.171e-04, train_time=3.468 +[gpuc01:0/16] 2024-01-24 00:54:42,831 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpuc01:0/16] 2024-01-24 00:55:02,173 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 00:55:05,889 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 00:55:05,889 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpuc01:0/16] 2024-01-24 00:55:05,895 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 01:01:59,395 (trainer:737) INFO: 6epoch:train:22501-22600batch: iter_time=2.812, forward_time=0.116, loss_ctc=66.171, loss_att=64.628, acc=0.661, loss=65.091, backward_time=0.135, grad_norm=30.821, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.170e-04, train_time=9.135 +[gpuc01:0/16] 2024-01-24 01:04:35,655 (trainer:737) INFO: 6epoch:train:22601-22700batch: iter_time=1.135e-04, forward_time=0.118, loss_ctc=64.447, loss_att=67.321, acc=0.651, loss=66.459, backward_time=0.135, grad_norm=26.598, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.168e-04, train_time=3.125 +[gpuc01:0/16] 2024-01-24 01:07:11,556 (trainer:737) INFO: 6epoch:train:22701-22800batch: iter_time=1.266e-04, forward_time=0.116, loss_ctc=70.253, loss_att=74.791, acc=0.634, loss=73.430, backward_time=0.135, grad_norm=27.533, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.167e-04, train_time=3.118 +[gpuc01:0/16] 2024-01-24 01:09:45,778 (trainer:737) INFO: 6epoch:train:22801-22900batch: iter_time=1.401e-04, forward_time=0.115, loss_ctc=66.476, loss_att=61.494, acc=0.650, loss=62.989, backward_time=0.134, grad_norm=28.590, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.166e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-24 01:12:21,434 (trainer:737) INFO: 6epoch:train:22901-23000batch: iter_time=1.400e-04, forward_time=0.115, loss_ctc=67.536, loss_att=63.096, acc=0.657, loss=64.428, backward_time=0.135, grad_norm=27.430, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.067, optim0_lr0=4.165e-04, train_time=3.113 +[gpuc01:0/16] 2024-01-24 01:14:56,933 (trainer:737) INFO: 6epoch:train:23001-23100batch: iter_time=1.323e-04, forward_time=0.115, loss_ctc=61.181, loss_att=63.662, acc=0.654, loss=62.918, backward_time=0.135, grad_norm=24.921, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.164e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 01:17:32,443 (trainer:737) INFO: 6epoch:train:23101-23200batch: iter_time=1.357e-04, forward_time=0.115, loss_ctc=60.405, loss_att=63.556, acc=0.629, loss=62.611, backward_time=0.134, grad_norm=24.305, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.162e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 01:20:08,895 (trainer:737) INFO: 6epoch:train:23201-23300batch: iter_time=1.350e-04, forward_time=0.115, loss_ctc=60.593, loss_att=59.410, acc=0.658, loss=59.765, backward_time=0.134, grad_norm=25.408, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.161e-04, train_time=3.129 +[gpuc01:0/16] 2024-01-24 01:22:44,729 (trainer:737) INFO: 6epoch:train:23301-23400batch: iter_time=1.392e-04, forward_time=0.115, loss_ctc=76.887, loss_att=64.086, acc=0.657, loss=67.927, backward_time=0.135, grad_norm=32.022, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.160e-04, train_time=3.116 +[gpuc01:0/16] 2024-01-24 01:25:20,179 (trainer:737) INFO: 6epoch:train:23401-23500batch: iter_time=1.336e-04, forward_time=0.116, loss_ctc=66.955, loss_att=60.789, acc=0.660, loss=62.639, backward_time=0.135, grad_norm=26.782, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.159e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-24 01:27:55,585 (trainer:737) INFO: 6epoch:train:23501-23600batch: iter_time=1.347e-04, forward_time=0.115, loss_ctc=66.896, loss_att=59.918, acc=0.664, loss=62.011, backward_time=0.134, grad_norm=27.328, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.158e-04, train_time=3.108 +[gpuc01:0/16] 2024-01-24 01:30:30,899 (trainer:737) INFO: 6epoch:train:23601-23700batch: iter_time=1.248e-04, forward_time=0.115, loss_ctc=65.074, loss_att=60.229, acc=0.659, loss=61.682, backward_time=0.135, grad_norm=26.348, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.156e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-24 01:33:05,101 (trainer:737) INFO: 6epoch:train:23701-23800batch: iter_time=1.335e-04, forward_time=0.115, loss_ctc=67.362, loss_att=60.363, acc=0.650, loss=62.463, backward_time=0.134, grad_norm=29.290, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.065, optim0_lr0=4.155e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-24 01:35:41,213 (trainer:737) INFO: 6epoch:train:23801-23900batch: iter_time=1.247e-04, forward_time=0.115, loss_ctc=63.651, loss_att=62.260, acc=0.667, loss=62.677, backward_time=0.135, grad_norm=26.042, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.154e-04, train_time=3.122 +[gpuc01:0/16] 2024-01-24 01:38:17,174 (trainer:737) INFO: 6epoch:train:23901-24000batch: iter_time=1.301e-04, forward_time=0.115, loss_ctc=49.928, loss_att=46.702, acc=0.659, loss=47.670, backward_time=0.134, grad_norm=21.655, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.065, optim0_lr0=4.153e-04, train_time=3.119 +[gpuc01:0/16] 2024-01-24 01:40:53,004 (trainer:737) INFO: 6epoch:train:24001-24100batch: iter_time=1.312e-04, forward_time=0.115, loss_ctc=82.737, loss_att=70.259, acc=0.646, loss=74.002, backward_time=0.135, grad_norm=31.477, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.152e-04, train_time=3.116 +[gpuc01:0/16] 2024-01-24 01:43:29,255 (trainer:737) INFO: 6epoch:train:24101-24200batch: iter_time=1.350e-04, forward_time=0.115, loss_ctc=64.696, loss_att=71.255, acc=0.643, loss=69.287, backward_time=0.135, grad_norm=27.576, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.150e-04, train_time=3.125 +[gpuc01:0/16] 2024-01-24 01:46:05,024 (trainer:737) INFO: 6epoch:train:24201-24300batch: iter_time=1.284e-04, forward_time=0.115, loss_ctc=64.682, loss_att=62.498, acc=0.646, loss=63.153, backward_time=0.134, grad_norm=25.652, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.149e-04, train_time=3.115 +[gpuc01:0/16] 2024-01-24 01:48:41,232 (trainer:737) INFO: 6epoch:train:24301-24400batch: iter_time=1.282e-04, forward_time=0.116, loss_ctc=68.243, loss_att=71.299, acc=0.644, loss=70.382, backward_time=0.135, grad_norm=27.879, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.148e-04, train_time=3.124 +[gpuc01:0/16] 2024-01-24 01:51:17,483 (trainer:737) INFO: 6epoch:train:24401-24500batch: iter_time=1.346e-04, forward_time=0.116, loss_ctc=82.720, loss_att=75.653, acc=0.662, loss=77.773, backward_time=0.135, grad_norm=31.123, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.147e-04, train_time=3.125 +[gpuc01:0/16] 2024-01-24 01:53:53,060 (trainer:737) INFO: 6epoch:train:24501-24600batch: iter_time=1.306e-04, forward_time=0.115, loss_ctc=59.742, loss_att=54.806, acc=0.675, loss=56.286, backward_time=0.134, grad_norm=24.838, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.146e-04, train_time=3.111 +[gpuc01:0/16] 2024-01-24 01:56:29,015 (trainer:737) INFO: 6epoch:train:24601-24700batch: iter_time=1.308e-04, forward_time=0.116, loss_ctc=67.227, loss_att=70.291, acc=0.635, loss=69.372, backward_time=0.136, grad_norm=27.587, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.065, optim0_lr0=4.145e-04, train_time=3.119 +[gpuc01:0/16] 2024-01-24 01:59:04,463 (trainer:737) INFO: 6epoch:train:24701-24800batch: iter_time=1.287e-04, forward_time=0.116, loss_ctc=62.685, loss_att=61.261, acc=0.656, loss=61.688, backward_time=0.135, grad_norm=25.407, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.143e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-24 02:01:41,128 (trainer:737) INFO: 6epoch:train:24801-24900batch: iter_time=1.292e-04, forward_time=0.119, loss_ctc=59.049, loss_att=52.485, acc=0.662, loss=54.454, backward_time=0.134, grad_norm=24.186, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.142e-04, train_time=3.133 +[gpuc01:0/16] 2024-01-24 02:04:45,117 (trainer:737) INFO: 6epoch:train:24901-25000batch: iter_time=1.220e-04, forward_time=0.242, loss_ctc=69.694, loss_att=56.976, acc=0.666, loss=60.791, backward_time=0.149, grad_norm=30.595, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.074, optim0_lr0=4.141e-04, train_time=3.666 +[gpuc01:0/16] 2024-01-24 02:05:03,619 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpuc01:0/16] 2024-01-24 02:05:22,860 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 02:05:26,480 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 02:05:26,480 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpuc01:0/16] 2024-01-24 02:05:26,487 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 02:11:21,462 (trainer:737) INFO: 6epoch:train:25001-25100batch: iter_time=2.390, forward_time=0.116, loss_ctc=65.598, loss_att=61.703, acc=0.672, loss=62.871, backward_time=0.135, grad_norm=28.508, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.140e-04, train_time=7.941 +[gpuc01:0/16] 2024-01-24 02:13:57,571 (trainer:737) INFO: 6epoch:train:25101-25200batch: iter_time=1.453e-04, forward_time=0.115, loss_ctc=63.577, loss_att=62.314, acc=0.660, loss=62.693, backward_time=0.135, grad_norm=25.073, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.139e-04, train_time=3.122 +[gpuc01:0/16] 2024-01-24 02:16:32,605 (trainer:737) INFO: 6epoch:train:25201-25300batch: iter_time=1.466e-04, forward_time=0.115, loss_ctc=69.201, loss_att=72.547, acc=0.639, loss=71.543, backward_time=0.135, grad_norm=26.459, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.137e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 02:19:07,393 (trainer:737) INFO: 6epoch:train:25301-25400batch: iter_time=1.305e-04, forward_time=0.115, loss_ctc=66.136, loss_att=60.739, acc=0.651, loss=62.358, backward_time=0.134, grad_norm=26.979, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.065, optim0_lr0=4.136e-04, train_time=3.096 +[gpuc01:0/16] 2024-01-24 02:21:42,934 (trainer:737) INFO: 6epoch:train:25401-25500batch: iter_time=1.425e-04, forward_time=0.115, loss_ctc=67.232, loss_att=62.070, acc=0.663, loss=63.618, backward_time=0.135, grad_norm=27.644, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.065, optim0_lr0=4.135e-04, train_time=3.111 +[gpuc01:0/16] 2024-01-24 02:24:17,443 (trainer:737) INFO: 6epoch:train:25501-25600batch: iter_time=1.432e-04, forward_time=0.115, loss_ctc=61.244, loss_att=61.649, acc=0.662, loss=61.527, backward_time=0.135, grad_norm=24.424, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.065, optim0_lr0=4.134e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-24 02:26:52,136 (trainer:737) INFO: 6epoch:train:25601-25700batch: iter_time=1.456e-04, forward_time=0.114, loss_ctc=61.160, loss_att=62.341, acc=0.636, loss=61.987, backward_time=0.134, grad_norm=23.442, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.133e-04, train_time=3.094 +[gpuc01:0/16] 2024-01-24 02:29:28,398 (trainer:737) INFO: 6epoch:train:25701-25800batch: iter_time=1.417e-04, forward_time=0.114, loss_ctc=59.173, loss_att=58.436, acc=0.663, loss=58.657, backward_time=0.134, grad_norm=26.189, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.132e-04, train_time=3.125 +[gpuc01:0/16] 2024-01-24 02:32:04,778 (trainer:737) INFO: 6epoch:train:25801-25900batch: iter_time=1.510e-04, forward_time=0.115, loss_ctc=77.218, loss_att=64.276, acc=0.658, loss=68.159, backward_time=0.135, grad_norm=34.663, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.130e-04, train_time=3.127 +[gpuc01:0/16] 2024-01-24 02:34:39,527 (trainer:737) INFO: 6epoch:train:25901-26000batch: iter_time=1.468e-04, forward_time=0.114, loss_ctc=64.943, loss_att=58.116, acc=0.666, loss=60.164, backward_time=0.134, grad_norm=27.747, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.066, optim0_lr0=4.129e-04, train_time=3.095 +[gpuc01:0/16] 2024-01-24 02:37:15,813 (trainer:737) INFO: 6epoch:train:26001-26100batch: iter_time=1.388e-04, forward_time=0.115, loss_ctc=65.795, loss_att=59.074, acc=0.666, loss=61.090, backward_time=0.135, grad_norm=26.591, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.128e-04, train_time=3.125 +[gpuc01:0/16] 2024-01-24 02:39:52,206 (trainer:737) INFO: 6epoch:train:26101-26200batch: iter_time=1.459e-04, forward_time=0.114, loss_ctc=64.570, loss_att=58.821, acc=0.661, loss=60.546, backward_time=0.135, grad_norm=26.530, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.127e-04, train_time=3.128 +[gpuc01:0/16] 2024-01-24 02:42:28,810 (trainer:737) INFO: 6epoch:train:26201-26300batch: iter_time=1.577e-04, forward_time=0.114, loss_ctc=66.696, loss_att=60.263, acc=0.647, loss=62.193, backward_time=0.135, grad_norm=27.388, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.126e-04, train_time=3.132 +[gpuc01:0/16] 2024-01-24 02:45:05,964 (trainer:737) INFO: 6epoch:train:26301-26400batch: iter_time=1.635e-04, forward_time=0.114, loss_ctc=63.553, loss_att=61.698, acc=0.672, loss=62.255, backward_time=0.135, grad_norm=28.458, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.124e-04, train_time=3.143 +[gpuc01:0/16] 2024-01-24 02:47:46,599 (trainer:737) INFO: 6epoch:train:26401-26500batch: iter_time=1.523e-04, forward_time=0.113, loss_ctc=49.790, loss_att=45.864, acc=0.663, loss=47.041, backward_time=0.134, grad_norm=24.912, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.123e-04, train_time=3.212 +[gpuc01:0/16] 2024-01-24 02:50:27,045 (trainer:737) INFO: 6epoch:train:26501-26600batch: iter_time=1.576e-04, forward_time=0.119, loss_ctc=80.292, loss_att=68.202, acc=0.651, loss=71.829, backward_time=0.136, grad_norm=31.810, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.122e-04, train_time=3.209 +[gpuc01:0/16] 2024-01-24 02:53:03,054 (trainer:737) INFO: 6epoch:train:26601-26700batch: iter_time=1.502e-04, forward_time=0.114, loss_ctc=64.490, loss_att=71.497, acc=0.645, loss=69.395, backward_time=0.135, grad_norm=27.443, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.121e-04, train_time=3.120 +[gpuc01:0/16] 2024-01-24 02:55:39,126 (trainer:737) INFO: 6epoch:train:26701-26800batch: iter_time=1.593e-04, forward_time=0.115, loss_ctc=64.295, loss_att=62.138, acc=0.649, loss=62.785, backward_time=0.134, grad_norm=25.779, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.120e-04, train_time=3.121 +[gpuc01:0/16] 2024-01-24 02:58:15,278 (trainer:737) INFO: 6epoch:train:26801-26900batch: iter_time=1.470e-04, forward_time=0.116, loss_ctc=67.996, loss_att=71.486, acc=0.646, loss=70.439, backward_time=0.135, grad_norm=28.717, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.119e-04, train_time=3.123 +[gpuc01:0/16] 2024-01-24 03:00:50,308 (trainer:737) INFO: 6epoch:train:26901-27000batch: iter_time=1.494e-04, forward_time=0.115, loss_ctc=81.268, loss_att=74.051, acc=0.665, loss=76.216, backward_time=0.136, grad_norm=30.372, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.117e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 03:03:25,918 (trainer:737) INFO: 6epoch:train:27001-27100batch: iter_time=1.544e-04, forward_time=0.115, loss_ctc=59.550, loss_att=54.472, acc=0.677, loss=55.996, backward_time=0.134, grad_norm=23.646, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.116e-04, train_time=3.112 +[gpuc01:0/16] 2024-01-24 03:06:01,756 (trainer:737) INFO: 6epoch:train:27101-27200batch: iter_time=1.536e-04, forward_time=0.116, loss_ctc=67.319, loss_att=70.525, acc=0.638, loss=69.563, backward_time=0.136, grad_norm=28.593, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.115e-04, train_time=3.117 +[gpuc01:0/16] 2024-01-24 03:08:37,472 (trainer:737) INFO: 6epoch:train:27201-27300batch: iter_time=1.474e-04, forward_time=0.115, loss_ctc=62.476, loss_att=60.427, acc=0.659, loss=61.042, backward_time=0.135, grad_norm=25.748, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.114e-04, train_time=3.114 +[gpuc01:0/16] 2024-01-24 03:11:11,610 (trainer:737) INFO: 6epoch:train:27301-27400batch: iter_time=1.516e-04, forward_time=0.114, loss_ctc=59.349, loss_att=52.159, acc=0.667, loss=54.316, backward_time=0.134, grad_norm=23.893, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.113e-04, train_time=3.083 +[gpuc01:0/16] 2024-01-24 03:13:52,330 (trainer:737) INFO: 6epoch:train:27401-27500batch: iter_time=5.021e-04, forward_time=0.116, loss_ctc=68.750, loss_att=56.793, acc=0.668, loss=60.380, backward_time=0.134, grad_norm=32.537, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.112e-04, train_time=3.214 +[gpuc01:0/16] 2024-01-24 03:14:05,427 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpuc01:0/16] 2024-01-24 03:14:23,908 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 03:14:27,636 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 03:14:27,636 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpuc01:0/16] 2024-01-24 03:14:27,642 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 03:20:40,355 (trainer:737) INFO: 6epoch:train:27501-27600batch: iter_time=2.518, forward_time=0.115, loss_ctc=64.816, loss_att=63.361, acc=0.677, loss=63.797, backward_time=0.135, grad_norm=29.929, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.111e-04, train_time=8.160 +[gpuc01:0/16] 2024-01-24 03:23:15,142 (trainer:737) INFO: 6epoch:train:27601-27700batch: iter_time=1.483e-04, forward_time=0.115, loss_ctc=63.203, loss_att=65.798, acc=0.665, loss=65.019, backward_time=0.136, grad_norm=25.611, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.109e-04, train_time=3.096 +[gpuc01:0/16] 2024-01-24 03:25:50,162 (trainer:737) INFO: 6epoch:train:27701-27800batch: iter_time=1.535e-04, forward_time=0.115, loss_ctc=69.370, loss_att=75.157, acc=0.643, loss=73.421, backward_time=0.136, grad_norm=27.343, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.108e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 03:28:23,668 (trainer:737) INFO: 6epoch:train:27801-27900batch: iter_time=1.526e-04, forward_time=0.114, loss_ctc=66.034, loss_att=66.428, acc=0.654, loss=66.310, backward_time=0.135, grad_norm=27.624, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.107e-04, train_time=3.070 +[gpuc01:0/16] 2024-01-24 03:30:58,733 (trainer:737) INFO: 6epoch:train:27901-28000batch: iter_time=1.595e-04, forward_time=0.115, loss_ctc=66.876, loss_att=62.213, acc=0.667, loss=63.612, backward_time=0.135, grad_norm=27.073, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.106e-04, train_time=3.101 +[gpuc01:0/16] 2024-01-24 03:33:34,147 (trainer:737) INFO: 6epoch:train:28001-28100batch: iter_time=1.562e-04, forward_time=0.115, loss_ctc=60.327, loss_att=64.780, acc=0.665, loss=63.444, backward_time=0.136, grad_norm=25.420, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.105e-04, train_time=3.108 +[gpuc01:0/16] 2024-01-24 03:36:08,970 (trainer:737) INFO: 6epoch:train:28101-28200batch: iter_time=1.538e-04, forward_time=0.114, loss_ctc=60.097, loss_att=61.213, acc=0.655, loss=60.878, backward_time=0.135, grad_norm=22.684, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.104e-04, train_time=3.096 +[gpuc01:0/16] 2024-01-24 03:38:44,488 (trainer:737) INFO: 6epoch:train:28201-28300batch: iter_time=1.395e-04, forward_time=0.118, loss_ctc=59.318, loss_att=58.796, acc=0.665, loss=58.952, backward_time=0.135, grad_norm=24.942, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.102e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 03:41:18,992 (trainer:737) INFO: 6epoch:train:28301-28400batch: iter_time=1.390e-04, forward_time=0.115, loss_ctc=75.873, loss_att=65.373, acc=0.661, loss=68.523, backward_time=0.135, grad_norm=31.450, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.101e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-24 03:43:53,511 (trainer:737) INFO: 6epoch:train:28401-28500batch: iter_time=1.486e-04, forward_time=0.114, loss_ctc=65.044, loss_att=58.744, acc=0.678, loss=60.634, backward_time=0.135, grad_norm=24.576, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.100e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-24 03:46:28,539 (trainer:737) INFO: 6epoch:train:28501-28600batch: iter_time=1.432e-04, forward_time=0.115, loss_ctc=65.228, loss_att=59.062, acc=0.677, loss=60.911, backward_time=0.135, grad_norm=27.653, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.065, optim0_lr0=4.099e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 03:49:03,071 (trainer:737) INFO: 6epoch:train:28601-28700batch: iter_time=1.529e-04, forward_time=0.115, loss_ctc=64.507, loss_att=59.240, acc=0.674, loss=60.820, backward_time=0.135, grad_norm=27.762, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.098e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-24 03:51:37,959 (trainer:737) INFO: 6epoch:train:28701-28800batch: iter_time=1.527e-04, forward_time=0.114, loss_ctc=65.716, loss_att=58.876, acc=0.665, loss=60.928, backward_time=0.135, grad_norm=27.210, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.097e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-24 03:54:12,869 (trainer:737) INFO: 6epoch:train:28801-28900batch: iter_time=1.446e-04, forward_time=0.114, loss_ctc=62.308, loss_att=60.868, acc=0.680, loss=61.300, backward_time=0.136, grad_norm=24.739, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.096e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-24 03:56:48,003 (trainer:737) INFO: 6epoch:train:28901-29000batch: iter_time=1.536e-04, forward_time=0.114, loss_ctc=49.070, loss_att=45.735, acc=0.670, loss=46.736, backward_time=0.134, grad_norm=21.079, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.094e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-24 03:59:23,466 (trainer:737) INFO: 6epoch:train:29001-29100batch: iter_time=1.652e-04, forward_time=0.116, loss_ctc=80.262, loss_att=69.024, acc=0.659, loss=72.396, backward_time=0.136, grad_norm=32.730, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.093e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-24 04:01:58,559 (trainer:737) INFO: 6epoch:train:29101-29200batch: iter_time=1.539e-04, forward_time=0.116, loss_ctc=63.860, loss_att=69.779, acc=0.659, loss=68.003, backward_time=0.136, grad_norm=27.544, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.092e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-24 04:04:33,215 (trainer:737) INFO: 6epoch:train:29201-29300batch: iter_time=1.549e-04, forward_time=0.115, loss_ctc=63.313, loss_att=61.076, acc=0.668, loss=61.747, backward_time=0.135, grad_norm=23.747, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.091e-04, train_time=3.093 +[gpuc01:0/16] 2024-01-24 04:07:07,930 (trainer:737) INFO: 6epoch:train:29301-29400batch: iter_time=1.496e-04, forward_time=0.116, loss_ctc=67.853, loss_att=71.335, acc=0.658, loss=70.291, backward_time=0.136, grad_norm=29.734, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.090e-04, train_time=3.094 +[gpuc01:0/16] 2024-01-24 04:09:43,005 (trainer:737) INFO: 6epoch:train:29401-29500batch: iter_time=1.472e-04, forward_time=0.116, loss_ctc=81.134, loss_att=77.662, acc=0.664, loss=78.704, backward_time=0.136, grad_norm=29.072, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.089e-04, train_time=3.101 +[gpuc01:0/16] 2024-01-24 04:12:18,526 (trainer:737) INFO: 6epoch:train:29501-29600batch: iter_time=1.443e-04, forward_time=0.117, loss_ctc=59.054, loss_att=54.377, acc=0.689, loss=55.780, backward_time=0.135, grad_norm=23.222, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.088e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 04:14:54,996 (trainer:737) INFO: 6epoch:train:29601-29700batch: iter_time=1.500e-04, forward_time=0.116, loss_ctc=65.568, loss_att=70.464, acc=0.654, loss=68.995, backward_time=0.136, grad_norm=27.595, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.086e-04, train_time=3.129 +[gpuc01:0/16] 2024-01-24 04:17:30,680 (trainer:737) INFO: 6epoch:train:29701-29800batch: iter_time=1.468e-04, forward_time=0.115, loss_ctc=61.800, loss_att=59.293, acc=0.675, loss=60.045, backward_time=0.135, grad_norm=25.507, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.085e-04, train_time=3.113 +[gpuc01:0/16] 2024-01-24 04:20:07,093 (trainer:737) INFO: 6epoch:train:29801-29900batch: iter_time=1.518e-04, forward_time=0.115, loss_ctc=58.439, loss_att=51.834, acc=0.670, loss=53.816, backward_time=0.135, grad_norm=24.969, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.084e-04, train_time=3.128 +[gpuc01:0/16] 2024-01-24 04:22:43,514 (trainer:737) INFO: 6epoch:train:29901-30000batch: iter_time=1.572e-04, forward_time=0.116, loss_ctc=69.224, loss_att=57.503, acc=0.671, loss=61.019, backward_time=0.135, grad_norm=34.094, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.066, optim0_lr0=4.083e-04, train_time=3.128 +[gpuc01:0/16] 2024-01-24 05:01:08,781 (trainer:343) INFO: 6epoch results: [train] iter_time=0.080, forward_time=0.117, loss_ctc=68.760, loss_att=65.566, acc=0.648, loss=66.524, backward_time=0.135, grad_norm=27.664, clip=100.000, loss_scale=3.056e+17, optim_step_time=0.066, optim0_lr0=4.268e-04, train_time=3.231, time=13 hours, 27 minutes and 57.18 seconds, total_count=105000, gpu_max_cached_mem_GB=25.098, [valid] loss_ctc=60.976, cer_ctc=0.313, loss_att=55.410, acc=0.561, cer=0.389, wer=1.000, loss=57.080, time=38 minutes and 15.79 seconds, total_count=32697, gpu_max_cached_mem_GB=25.098 +[gpuc01:0/16] 2024-01-24 05:01:30,989 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpuc01:0/16] 2024-01-24 05:01:31,017 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/1epoch.pth +[gpuc01:0/16] 2024-01-24 05:01:31,018 (trainer:272) INFO: 7/45epoch started. Estimated time to finish: 3 weeks, 1 day and 22 hours +[gpuc01:0/16] 2024-01-24 05:01:31,038 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpuc01:0/16] 2024-01-24 05:01:49,992 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 05:01:53,545 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 05:01:53,545 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpuc01:0/16] 2024-01-24 05:01:53,551 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 05:06:14,119 (trainer:737) INFO: 7epoch:train:1-100batch: iter_time=1.257, forward_time=0.119, loss_ctc=66.859, loss_att=70.218, acc=0.641, loss=69.210, backward_time=0.146, grad_norm=32.746, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.068, optim0_lr0=4.082e-04, train_time=5.661 +[gpuc01:0/16] 2024-01-24 05:08:49,305 (trainer:737) INFO: 7epoch:train:101-200batch: iter_time=1.408e-04, forward_time=0.117, loss_ctc=65.274, loss_att=62.674, acc=0.670, loss=63.454, backward_time=0.137, grad_norm=26.045, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.081e-04, train_time=3.104 +[gpuc01:0/16] 2024-01-24 05:11:24,492 (trainer:737) INFO: 7epoch:train:201-300batch: iter_time=1.399e-04, forward_time=0.119, loss_ctc=65.223, loss_att=62.026, acc=0.645, loss=62.985, backward_time=0.136, grad_norm=25.975, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.080e-04, train_time=3.104 +[gpuc01:0/16] 2024-01-24 05:13:58,265 (trainer:737) INFO: 7epoch:train:301-400batch: iter_time=1.387e-04, forward_time=0.117, loss_ctc=71.611, loss_att=71.182, acc=0.663, loss=71.311, backward_time=0.137, grad_norm=30.300, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.078e-04, train_time=3.075 +[gpuc01:0/16] 2024-01-24 05:16:32,060 (trainer:737) INFO: 7epoch:train:401-500batch: iter_time=1.425e-04, forward_time=0.117, loss_ctc=80.914, loss_att=68.155, acc=0.667, loss=71.983, backward_time=0.136, grad_norm=30.921, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.077e-04, train_time=3.076 +[gpuc01:0/16] 2024-01-24 05:19:06,229 (trainer:737) INFO: 7epoch:train:501-600batch: iter_time=1.414e-04, forward_time=0.117, loss_ctc=62.229, loss_att=62.835, acc=0.677, loss=62.653, backward_time=0.136, grad_norm=23.407, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.076e-04, train_time=3.083 +[gpuc01:0/16] 2024-01-24 05:21:40,076 (trainer:737) INFO: 7epoch:train:601-700batch: iter_time=1.450e-04, forward_time=0.116, loss_ctc=61.912, loss_att=57.731, acc=0.635, loss=58.986, backward_time=0.135, grad_norm=26.029, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.075e-04, train_time=3.077 +[gpuc01:0/16] 2024-01-24 05:24:14,988 (trainer:737) INFO: 7epoch:train:701-800batch: iter_time=1.481e-04, forward_time=0.117, loss_ctc=70.174, loss_att=62.281, acc=0.654, loss=64.649, backward_time=0.136, grad_norm=27.461, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.074e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-24 05:26:49,446 (trainer:737) INFO: 7epoch:train:801-900batch: iter_time=1.372e-04, forward_time=0.117, loss_ctc=72.409, loss_att=66.252, acc=0.666, loss=68.099, backward_time=0.136, grad_norm=28.246, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.073e-04, train_time=3.089 +[gpuc01:0/16] 2024-01-24 05:29:24,456 (trainer:737) INFO: 7epoch:train:901-1000batch: iter_time=1.364e-04, forward_time=0.116, loss_ctc=58.005, loss_att=59.206, acc=0.661, loss=58.845, backward_time=0.135, grad_norm=23.707, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.072e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 05:31:58,648 (trainer:737) INFO: 7epoch:train:1001-1100batch: iter_time=1.427e-04, forward_time=0.116, loss_ctc=62.130, loss_att=66.269, acc=0.653, loss=65.027, backward_time=0.135, grad_norm=24.829, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.071e-04, train_time=3.084 +[gpuc01:0/16] 2024-01-24 05:34:35,098 (trainer:737) INFO: 7epoch:train:1101-1200batch: iter_time=1.325e-04, forward_time=0.116, loss_ctc=60.464, loss_att=60.647, acc=0.660, loss=60.592, backward_time=0.135, grad_norm=23.909, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.069e-04, train_time=3.129 +[gpuc01:0/16] 2024-01-24 05:37:07,867 (trainer:737) INFO: 7epoch:train:1201-1300batch: iter_time=1.337e-04, forward_time=0.116, loss_ctc=63.745, loss_att=58.330, acc=0.670, loss=59.955, backward_time=0.135, grad_norm=26.927, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.068e-04, train_time=3.055 +[gpuc01:0/16] 2024-01-24 05:39:41,437 (trainer:737) INFO: 7epoch:train:1301-1400batch: iter_time=1.374e-04, forward_time=0.116, loss_ctc=79.571, loss_att=74.544, acc=0.640, loss=76.052, backward_time=0.136, grad_norm=31.822, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.067e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-24 05:42:14,744 (trainer:737) INFO: 7epoch:train:1401-1500batch: iter_time=1.418e-04, forward_time=0.116, loss_ctc=63.185, loss_att=72.439, acc=0.630, loss=69.663, backward_time=0.136, grad_norm=29.250, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.066e-04, train_time=3.066 +[gpuc01:0/16] 2024-01-24 05:44:50,072 (trainer:737) INFO: 7epoch:train:1501-1600batch: iter_time=1.342e-04, forward_time=0.115, loss_ctc=64.412, loss_att=55.972, acc=0.668, loss=58.504, backward_time=0.135, grad_norm=28.477, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.065e-04, train_time=3.106 +[gpuc01:0/16] 2024-01-24 05:47:25,335 (trainer:737) INFO: 7epoch:train:1601-1700batch: iter_time=1.423e-04, forward_time=0.116, loss_ctc=70.978, loss_att=77.430, acc=0.638, loss=75.495, backward_time=0.136, grad_norm=30.908, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.068, optim0_lr0=4.064e-04, train_time=3.105 +[gpuc01:0/16] 2024-01-24 05:50:00,282 (trainer:737) INFO: 7epoch:train:1701-1800batch: iter_time=1.566e-04, forward_time=0.117, loss_ctc=69.694, loss_att=67.331, acc=0.681, loss=68.040, backward_time=0.137, grad_norm=29.623, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.063e-04, train_time=3.099 +[gpuc01:0/16] 2024-01-24 05:52:36,151 (trainer:737) INFO: 7epoch:train:1801-1900batch: iter_time=1.582e-04, forward_time=0.117, loss_ctc=70.814, loss_att=67.575, acc=0.648, loss=68.547, backward_time=0.137, grad_norm=25.900, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.062e-04, train_time=3.117 +[gpuc01:0/16] 2024-01-24 05:55:11,746 (trainer:737) INFO: 7epoch:train:1901-2000batch: iter_time=1.607e-04, forward_time=0.116, loss_ctc=63.408, loss_att=65.672, acc=0.658, loss=64.993, backward_time=0.136, grad_norm=24.525, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.061e-04, train_time=3.112 +[gpuc01:0/16] 2024-01-24 05:57:46,634 (trainer:737) INFO: 7epoch:train:2001-2100batch: iter_time=1.545e-04, forward_time=0.119, loss_ctc=63.068, loss_att=65.213, acc=0.673, loss=64.570, backward_time=0.136, grad_norm=24.620, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.059e-04, train_time=3.098 +[gpuc01:0/16] 2024-01-24 06:00:21,444 (trainer:737) INFO: 7epoch:train:2101-2200batch: iter_time=1.522e-04, forward_time=0.117, loss_ctc=63.779, loss_att=69.923, acc=0.640, loss=68.080, backward_time=0.136, grad_norm=25.762, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.058e-04, train_time=3.096 +[gpuc01:0/16] 2024-01-24 06:02:56,250 (trainer:737) INFO: 7epoch:train:2201-2300batch: iter_time=1.492e-04, forward_time=0.116, loss_ctc=72.625, loss_att=55.665, acc=0.672, loss=60.753, backward_time=0.135, grad_norm=31.539, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.057e-04, train_time=3.096 +[gpuc01:0/16] 2024-01-24 06:05:32,262 (trainer:737) INFO: 7epoch:train:2301-2400batch: iter_time=1.602e-04, forward_time=0.116, loss_ctc=70.676, loss_att=67.781, acc=0.645, loss=68.649, backward_time=0.136, grad_norm=30.339, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.056e-04, train_time=3.120 +[gpuc01:0/16] 2024-01-24 06:08:07,851 (trainer:737) INFO: 7epoch:train:2401-2500batch: iter_time=1.454e-04, forward_time=0.117, loss_ctc=73.286, loss_att=68.834, acc=0.646, loss=70.169, backward_time=0.136, grad_norm=30.131, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.055e-04, train_time=3.112 +[gpuc01:0/16] 2024-01-24 06:08:11,722 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpuc01:0/16] 2024-01-24 06:08:31,060 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 06:08:34,744 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 06:08:34,744 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpuc01:0/16] 2024-01-24 06:08:34,750 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 06:15:08,638 (trainer:737) INFO: 7epoch:train:2501-2600batch: iter_time=2.213, forward_time=0.117, loss_ctc=70.204, loss_att=70.471, acc=0.646, loss=70.391, backward_time=0.136, grad_norm=33.079, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.054e-04, train_time=8.416 +[gpuc01:0/16] 2024-01-24 06:17:44,374 (trainer:737) INFO: 7epoch:train:2601-2700batch: iter_time=1.420e-04, forward_time=0.116, loss_ctc=63.709, loss_att=61.687, acc=0.681, loss=62.294, backward_time=0.136, grad_norm=24.817, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.053e-04, train_time=3.115 +[gpuc01:0/16] 2024-01-24 06:20:19,543 (trainer:737) INFO: 7epoch:train:2701-2800batch: iter_time=1.434e-04, forward_time=0.119, loss_ctc=63.879, loss_att=60.667, acc=0.666, loss=61.630, backward_time=0.136, grad_norm=25.489, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.052e-04, train_time=3.103 +[gpuc01:0/16] 2024-01-24 06:22:54,924 (trainer:737) INFO: 7epoch:train:2801-2900batch: iter_time=1.425e-04, forward_time=0.117, loss_ctc=71.520, loss_att=70.086, acc=0.673, loss=70.516, backward_time=0.137, grad_norm=28.783, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.051e-04, train_time=3.107 +[gpuc01:0/16] 2024-01-24 06:25:30,666 (trainer:737) INFO: 7epoch:train:2901-3000batch: iter_time=1.516e-04, forward_time=0.117, loss_ctc=80.856, loss_att=67.841, acc=0.672, loss=71.746, backward_time=0.136, grad_norm=30.734, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.049e-04, train_time=3.115 +[gpuc01:0/16] 2024-01-24 06:28:05,268 (trainer:737) INFO: 7epoch:train:3001-3100batch: iter_time=1.664e-04, forward_time=0.117, loss_ctc=61.360, loss_att=63.069, acc=0.678, loss=62.556, backward_time=0.136, grad_norm=24.238, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.048e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-24 06:30:40,359 (trainer:737) INFO: 7epoch:train:3101-3200batch: iter_time=1.620e-04, forward_time=0.116, loss_ctc=60.353, loss_att=56.416, acc=0.650, loss=57.597, backward_time=0.135, grad_norm=25.396, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.047e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-24 06:33:16,232 (trainer:737) INFO: 7epoch:train:3201-3300batch: iter_time=1.554e-04, forward_time=0.117, loss_ctc=69.128, loss_att=63.963, acc=0.656, loss=65.513, backward_time=0.136, grad_norm=28.424, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.046e-04, train_time=3.117 +[gpuc01:0/16] 2024-01-24 06:35:51,689 (trainer:737) INFO: 7epoch:train:3301-3400batch: iter_time=1.580e-04, forward_time=0.117, loss_ctc=72.123, loss_att=65.656, acc=0.674, loss=67.596, backward_time=0.136, grad_norm=27.878, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.045e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-24 06:38:25,975 (trainer:737) INFO: 7epoch:train:3401-3500batch: iter_time=1.651e-04, forward_time=0.117, loss_ctc=56.657, loss_att=59.324, acc=0.669, loss=58.524, backward_time=0.136, grad_norm=21.756, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.044e-04, train_time=3.086 +[gpuc01:0/16] 2024-01-24 06:40:59,281 (trainer:737) INFO: 7epoch:train:3501-3600batch: iter_time=1.749e-04, forward_time=0.117, loss_ctc=60.442, loss_att=65.259, acc=0.659, loss=63.814, backward_time=0.137, grad_norm=23.213, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.043e-04, train_time=3.066 +[gpuc01:0/16] 2024-01-24 06:43:32,439 (trainer:737) INFO: 7epoch:train:3601-3700batch: iter_time=1.549e-04, forward_time=0.116, loss_ctc=59.150, loss_att=58.908, acc=0.673, loss=58.981, backward_time=0.136, grad_norm=23.519, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.042e-04, train_time=3.063 +[gpuc01:0/16] 2024-01-24 06:46:06,035 (trainer:737) INFO: 7epoch:train:3701-3800batch: iter_time=1.514e-04, forward_time=0.117, loss_ctc=62.233, loss_att=56.806, acc=0.687, loss=58.434, backward_time=0.136, grad_norm=25.090, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.041e-04, train_time=3.072 +[gpuc01:0/16] 2024-01-24 06:48:39,267 (trainer:737) INFO: 7epoch:train:3801-3900batch: iter_time=1.598e-04, forward_time=0.117, loss_ctc=78.488, loss_att=75.118, acc=0.649, loss=76.129, backward_time=0.137, grad_norm=31.391, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.067, optim0_lr0=4.039e-04, train_time=3.064 +[gpuc01:0/16] 2024-01-24 06:51:11,401 (trainer:737) INFO: 7epoch:train:3901-4000batch: iter_time=1.575e-04, forward_time=0.117, loss_ctc=61.627, loss_att=70.308, acc=0.653, loss=67.704, backward_time=0.136, grad_norm=26.086, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.066, optim0_lr0=4.038e-04, train_time=3.042 +[gpuc01:0/16] 2024-01-24 06:53:45,007 (trainer:737) INFO: 7epoch:train:4001-4100batch: iter_time=1.597e-04, forward_time=0.116, loss_ctc=63.630, loss_att=54.345, acc=0.683, loss=57.130, backward_time=0.136, grad_norm=28.165, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.037e-04, train_time=3.072 +[gpuc01:0/16] 2024-01-24 06:56:18,570 (trainer:737) INFO: 7epoch:train:4101-4200batch: iter_time=1.525e-04, forward_time=0.117, loss_ctc=70.332, loss_att=82.562, acc=0.642, loss=78.893, backward_time=0.137, grad_norm=29.150, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.036e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-24 06:58:52,107 (trainer:737) INFO: 7epoch:train:4201-4300batch: iter_time=1.568e-04, forward_time=0.117, loss_ctc=68.965, loss_att=67.807, acc=0.686, loss=68.154, backward_time=0.136, grad_norm=27.087, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.035e-04, train_time=3.071 +[gpuc01:0/16] 2024-01-24 07:01:26,226 (trainer:737) INFO: 7epoch:train:4301-4400batch: iter_time=1.424e-04, forward_time=0.117, loss_ctc=69.025, loss_att=67.169, acc=0.660, loss=67.726, backward_time=0.136, grad_norm=24.927, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.034e-04, train_time=3.082 +[gpuc01:0/16] 2024-01-24 07:03:59,235 (trainer:737) INFO: 7epoch:train:4401-4500batch: iter_time=1.578e-04, forward_time=0.117, loss_ctc=62.761, loss_att=64.797, acc=0.676, loss=64.186, backward_time=0.136, grad_norm=23.821, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.033e-04, train_time=3.060 +[gpuc01:0/16] 2024-01-24 07:06:33,005 (trainer:737) INFO: 7epoch:train:4501-4600batch: iter_time=1.570e-04, forward_time=0.117, loss_ctc=61.265, loss_att=63.038, acc=0.680, loss=62.506, backward_time=0.136, grad_norm=25.773, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.032e-04, train_time=3.075 +[gpuc01:0/16] 2024-01-24 07:09:06,977 (trainer:737) INFO: 7epoch:train:4601-4700batch: iter_time=1.480e-04, forward_time=0.117, loss_ctc=62.021, loss_att=67.707, acc=0.660, loss=66.001, backward_time=0.136, grad_norm=24.837, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.031e-04, train_time=3.079 +[gpuc01:0/16] 2024-01-24 07:11:41,049 (trainer:737) INFO: 7epoch:train:4701-4800batch: iter_time=1.557e-04, forward_time=0.119, loss_ctc=70.365, loss_att=54.515, acc=0.687, loss=59.270, backward_time=0.136, grad_norm=29.686, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.030e-04, train_time=3.081 +[gpuc01:0/16] 2024-01-24 07:14:15,557 (trainer:737) INFO: 7epoch:train:4801-4900batch: iter_time=1.610e-04, forward_time=0.117, loss_ctc=68.620, loss_att=65.538, acc=0.655, loss=66.462, backward_time=0.136, grad_norm=28.907, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.029e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-24 07:17:08,889 (trainer:737) INFO: 7epoch:train:4901-5000batch: iter_time=7.131e-04, forward_time=0.231, loss_ctc=69.909, loss_att=68.076, acc=0.666, loss=68.626, backward_time=0.158, grad_norm=28.536, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=4.027e-04, train_time=3.466 +[gpuc01:0/16] 2024-01-24 07:17:29,065 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpuc01:0/16] 2024-01-24 07:17:48,657 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 07:17:52,298 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 07:17:52,298 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpuc01:0/16] 2024-01-24 07:17:52,304 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 07:24:29,636 (trainer:737) INFO: 7epoch:train:5001-5100batch: iter_time=2.860, forward_time=0.117, loss_ctc=67.805, loss_att=68.484, acc=0.651, loss=68.280, backward_time=0.136, grad_norm=31.484, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.026e-04, train_time=8.815 +[gpuc01:0/16] 2024-01-24 07:27:04,745 (trainer:737) INFO: 7epoch:train:5101-5200batch: iter_time=1.497e-04, forward_time=0.116, loss_ctc=62.567, loss_att=60.723, acc=0.676, loss=61.276, backward_time=0.136, grad_norm=25.614, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.025e-04, train_time=3.102 +[gpuc01:0/16] 2024-01-24 07:29:39,254 (trainer:737) INFO: 7epoch:train:5201-5300batch: iter_time=1.609e-04, forward_time=0.116, loss_ctc=63.197, loss_att=59.798, acc=0.654, loss=60.818, backward_time=0.135, grad_norm=25.431, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.024e-04, train_time=3.090 +[gpuc01:0/16] 2024-01-24 07:32:13,414 (trainer:737) INFO: 7epoch:train:5301-5400batch: iter_time=1.589e-04, forward_time=0.117, loss_ctc=70.380, loss_att=69.471, acc=0.668, loss=69.743, backward_time=0.136, grad_norm=28.482, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.023e-04, train_time=3.083 +[gpuc01:0/16] 2024-01-24 07:34:48,082 (trainer:737) INFO: 7epoch:train:5401-5500batch: iter_time=1.600e-04, forward_time=0.117, loss_ctc=77.935, loss_att=65.698, acc=0.677, loss=69.369, backward_time=0.136, grad_norm=28.230, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.022e-04, train_time=3.093 +[gpuc01:0/16] 2024-01-24 07:37:23,108 (trainer:737) INFO: 7epoch:train:5501-5600batch: iter_time=1.502e-04, forward_time=0.119, loss_ctc=60.521, loss_att=60.925, acc=0.685, loss=60.804, backward_time=0.135, grad_norm=23.596, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.021e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 07:39:57,290 (trainer:737) INFO: 7epoch:train:5601-5700batch: iter_time=1.510e-04, forward_time=0.115, loss_ctc=59.306, loss_att=55.867, acc=0.647, loss=56.899, backward_time=0.134, grad_norm=26.364, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.020e-04, train_time=3.083 +[gpuc01:0/16] 2024-01-24 07:42:33,029 (trainer:737) INFO: 7epoch:train:5701-5800batch: iter_time=1.509e-04, forward_time=0.116, loss_ctc=67.936, loss_att=60.234, acc=0.664, loss=62.545, backward_time=0.135, grad_norm=27.708, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.019e-04, train_time=3.115 +[gpuc01:0/16] 2024-01-24 07:45:08,233 (trainer:737) INFO: 7epoch:train:5801-5900batch: iter_time=1.571e-04, forward_time=0.117, loss_ctc=71.474, loss_att=65.705, acc=0.672, loss=67.436, backward_time=0.136, grad_norm=27.020, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.018e-04, train_time=3.104 +[gpuc01:0/16] 2024-01-24 07:47:43,223 (trainer:737) INFO: 7epoch:train:5901-6000batch: iter_time=1.630e-04, forward_time=0.116, loss_ctc=56.506, loss_att=57.492, acc=0.667, loss=57.196, backward_time=0.135, grad_norm=24.860, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.017e-04, train_time=3.100 +[gpuc01:0/16] 2024-01-24 07:50:18,656 (trainer:737) INFO: 7epoch:train:6001-6100batch: iter_time=1.711e-04, forward_time=0.116, loss_ctc=60.244, loss_att=64.293, acc=0.659, loss=63.078, backward_time=0.135, grad_norm=23.326, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.016e-04, train_time=3.108 +[gpuc01:0/16] 2024-01-24 07:52:53,272 (trainer:737) INFO: 7epoch:train:6101-6200batch: iter_time=1.588e-04, forward_time=0.116, loss_ctc=58.502, loss_att=58.587, acc=0.664, loss=58.562, backward_time=0.135, grad_norm=23.509, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.014e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-24 07:55:27,440 (trainer:737) INFO: 7epoch:train:6201-6300batch: iter_time=1.584e-04, forward_time=0.116, loss_ctc=61.123, loss_att=56.094, acc=0.679, loss=57.603, backward_time=0.135, grad_norm=25.456, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.068, optim0_lr0=4.013e-04, train_time=3.083 +[gpuc01:0/16] 2024-01-24 07:58:02,972 (trainer:737) INFO: 7epoch:train:6301-6400batch: iter_time=1.586e-04, forward_time=0.117, loss_ctc=77.684, loss_att=73.704, acc=0.644, loss=74.898, backward_time=0.136, grad_norm=31.967, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.012e-04, train_time=3.110 +[gpuc01:0/16] 2024-01-24 08:00:38,241 (trainer:737) INFO: 7epoch:train:6401-6500batch: iter_time=1.625e-04, forward_time=0.116, loss_ctc=61.444, loss_att=69.964, acc=0.639, loss=67.408, backward_time=0.135, grad_norm=27.964, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.011e-04, train_time=3.105 +[gpuc01:0/16] 2024-01-24 08:03:12,828 (trainer:737) INFO: 7epoch:train:6501-6600batch: iter_time=1.526e-04, forward_time=0.116, loss_ctc=63.575, loss_att=54.665, acc=0.674, loss=57.338, backward_time=0.135, grad_norm=29.346, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.010e-04, train_time=3.092 +[gpuc01:0/16] 2024-01-24 08:05:46,612 (trainer:737) INFO: 7epoch:train:6601-6700batch: iter_time=1.514e-04, forward_time=0.116, loss_ctc=68.588, loss_att=75.650, acc=0.644, loss=73.532, backward_time=0.135, grad_norm=29.704, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.009e-04, train_time=3.075 +[gpuc01:0/16] 2024-01-24 08:08:20,268 (trainer:737) INFO: 7epoch:train:6701-6800batch: iter_time=1.638e-04, forward_time=0.117, loss_ctc=67.712, loss_att=65.018, acc=0.687, loss=65.826, backward_time=0.137, grad_norm=26.736, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.068, optim0_lr0=4.008e-04, train_time=3.073 +[gpuc01:0/16] 2024-01-24 08:10:55,819 (trainer:737) INFO: 7epoch:train:6801-6900batch: iter_time=1.586e-04, forward_time=0.116, loss_ctc=68.245, loss_att=66.227, acc=0.655, loss=66.833, backward_time=0.136, grad_norm=25.952, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.007e-04, train_time=3.111 +[gpuc01:0/16] 2024-01-24 08:13:31,284 (trainer:737) INFO: 7epoch:train:6901-7000batch: iter_time=1.577e-04, forward_time=0.117, loss_ctc=61.462, loss_att=63.623, acc=0.663, loss=62.975, backward_time=0.136, grad_norm=25.644, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.006e-04, train_time=3.109 +[gpuc01:0/16] 2024-01-24 08:18:09,166 (trainer:737) INFO: 7epoch:train:7001-7100batch: iter_time=0.184, forward_time=0.116, loss_ctc=60.095, loss_att=63.880, acc=0.676, loss=62.745, backward_time=0.135, grad_norm=26.611, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.005e-04, train_time=5.557 +[gpuc01:0/16] 2024-01-24 08:20:11,280 (trainer:737) INFO: 7epoch:train:7101-7200batch: iter_time=1.428e-04, forward_time=0.116, loss_ctc=61.806, loss_att=68.226, acc=0.648, loss=66.300, backward_time=0.137, grad_norm=26.573, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.068, optim0_lr0=4.004e-04, train_time=2.442 +[gpuc01:0/16] 2024-01-24 08:22:13,773 (trainer:737) INFO: 7epoch:train:7201-7300batch: iter_time=1.417e-04, forward_time=0.116, loss_ctc=70.260, loss_att=54.320, acc=0.678, loss=59.102, backward_time=0.137, grad_norm=32.850, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.003e-04, train_time=2.450 +[gpuc01:0/16] 2024-01-24 08:24:16,121 (trainer:737) INFO: 7epoch:train:7301-7400batch: iter_time=1.530e-04, forward_time=0.116, loss_ctc=67.488, loss_att=65.635, acc=0.649, loss=66.191, backward_time=0.137, grad_norm=31.229, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.002e-04, train_time=2.447 +[gpuc01:0/16] 2024-01-24 08:26:24,545 (trainer:737) INFO: 7epoch:train:7401-7500batch: iter_time=1.378e-04, forward_time=0.117, loss_ctc=68.720, loss_att=65.960, acc=0.654, loss=66.788, backward_time=0.137, grad_norm=28.983, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=4.001e-04, train_time=2.568 +[gpuc01:0/16] 2024-01-24 08:26:28,465 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpuc01:0/16] 2024-01-24 08:26:47,581 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 08:26:51,627 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 08:26:51,627 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpuc01:0/16] 2024-01-24 08:26:51,634 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 08:33:26,899 (trainer:737) INFO: 7epoch:train:7501-7600batch: iter_time=2.272, forward_time=0.125, loss_ctc=67.788, loss_att=65.837, acc=0.653, loss=66.422, backward_time=0.139, grad_norm=31.914, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.068, optim0_lr0=3.999e-04, train_time=8.447 +[gpuc01:0/16] 2024-01-24 08:35:33,368 (trainer:737) INFO: 7epoch:train:7601-7700batch: iter_time=1.639e-04, forward_time=0.116, loss_ctc=61.428, loss_att=59.442, acc=0.678, loss=60.038, backward_time=0.138, grad_norm=25.801, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.068, optim0_lr0=3.998e-04, train_time=2.529 +[gpuc01:0/16] 2024-01-24 08:37:39,527 (trainer:737) INFO: 7epoch:train:7701-7800batch: iter_time=1.649e-04, forward_time=0.116, loss_ctc=63.338, loss_att=58.405, acc=0.659, loss=59.885, backward_time=0.138, grad_norm=24.175, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=3.997e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 08:39:44,321 (trainer:737) INFO: 7epoch:train:7801-7900batch: iter_time=1.408e-04, forward_time=0.117, loss_ctc=70.166, loss_att=67.972, acc=0.670, loss=68.630, backward_time=0.139, grad_norm=28.231, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.068, optim0_lr0=3.996e-04, train_time=2.496 +[gpuc01:0/16] 2024-01-24 08:41:49,886 (trainer:737) INFO: 7epoch:train:7901-8000batch: iter_time=1.366e-04, forward_time=0.117, loss_ctc=76.838, loss_att=64.865, acc=0.678, loss=68.457, backward_time=0.138, grad_norm=29.901, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.067, optim0_lr0=3.995e-04, train_time=2.511 +[gpuc01:0/16] 2024-01-24 08:43:56,055 (trainer:737) INFO: 7epoch:train:8001-8100batch: iter_time=1.454e-04, forward_time=0.116, loss_ctc=59.828, loss_att=59.608, acc=0.686, loss=59.674, backward_time=0.138, grad_norm=24.426, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.994e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 08:46:02,353 (trainer:737) INFO: 7epoch:train:8101-8200batch: iter_time=1.525e-04, forward_time=0.116, loss_ctc=59.309, loss_att=54.184, acc=0.652, loss=55.722, backward_time=0.137, grad_norm=24.208, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.993e-04, train_time=2.526 +[gpuc01:0/16] 2024-01-24 08:48:08,371 (trainer:737) INFO: 7epoch:train:8201-8300batch: iter_time=1.774e-04, forward_time=0.117, loss_ctc=68.014, loss_att=59.403, acc=0.664, loss=61.986, backward_time=0.138, grad_norm=27.729, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.992e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 08:50:13,951 (trainer:737) INFO: 7epoch:train:8301-8400batch: iter_time=1.589e-04, forward_time=0.117, loss_ctc=71.012, loss_att=64.932, acc=0.673, loss=66.756, backward_time=0.138, grad_norm=28.703, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.991e-04, train_time=2.511 +[gpuc01:0/16] 2024-01-24 08:52:19,690 (trainer:737) INFO: 7epoch:train:8401-8500batch: iter_time=1.606e-04, forward_time=0.116, loss_ctc=55.032, loss_att=56.420, acc=0.668, loss=56.004, backward_time=0.137, grad_norm=22.534, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.990e-04, train_time=2.515 +[gpuc01:0/16] 2024-01-24 08:54:25,398 (trainer:737) INFO: 7epoch:train:8501-8600batch: iter_time=1.599e-04, forward_time=0.116, loss_ctc=59.076, loss_att=62.961, acc=0.664, loss=61.796, backward_time=0.137, grad_norm=23.731, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.989e-04, train_time=2.514 +[gpuc01:0/16] 2024-01-24 08:56:31,460 (trainer:737) INFO: 7epoch:train:8601-8700batch: iter_time=1.426e-04, forward_time=0.118, loss_ctc=58.343, loss_att=57.096, acc=0.672, loss=57.470, backward_time=0.136, grad_norm=24.074, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.988e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 08:58:37,473 (trainer:737) INFO: 7epoch:train:8701-8800batch: iter_time=1.396e-04, forward_time=0.116, loss_ctc=59.731, loss_att=54.962, acc=0.683, loss=56.393, backward_time=0.137, grad_norm=25.209, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.987e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 09:00:43,409 (trainer:737) INFO: 7epoch:train:8801-8900batch: iter_time=1.500e-04, forward_time=0.116, loss_ctc=76.773, loss_att=73.549, acc=0.643, loss=74.516, backward_time=0.138, grad_norm=33.732, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.986e-04, train_time=2.518 +[gpuc01:0/16] 2024-01-24 09:02:48,916 (trainer:737) INFO: 7epoch:train:8901-9000batch: iter_time=1.400e-04, forward_time=0.116, loss_ctc=60.917, loss_att=70.105, acc=0.640, loss=67.349, backward_time=0.137, grad_norm=29.307, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.985e-04, train_time=2.510 +[gpuc01:0/16] 2024-01-24 09:04:55,160 (trainer:737) INFO: 7epoch:train:9001-9100batch: iter_time=1.429e-04, forward_time=0.115, loss_ctc=62.184, loss_att=55.456, acc=0.670, loss=57.474, backward_time=0.136, grad_norm=29.076, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.984e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 09:07:02,114 (trainer:737) INFO: 7epoch:train:9101-9200batch: iter_time=1.405e-04, forward_time=0.115, loss_ctc=67.817, loss_att=73.270, acc=0.652, loss=71.634, backward_time=0.137, grad_norm=31.837, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.982e-04, train_time=2.539 +[gpuc01:0/16] 2024-01-24 09:09:08,246 (trainer:737) INFO: 7epoch:train:9201-9300batch: iter_time=1.439e-04, forward_time=0.117, loss_ctc=66.513, loss_att=63.588, acc=0.688, loss=64.466, backward_time=0.138, grad_norm=27.322, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.981e-04, train_time=2.522 +[gpuc01:0/16] 2024-01-24 09:11:14,482 (trainer:737) INFO: 7epoch:train:9301-9400batch: iter_time=1.512e-04, forward_time=0.115, loss_ctc=67.880, loss_att=64.619, acc=0.660, loss=65.597, backward_time=0.137, grad_norm=26.365, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.980e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 09:13:21,117 (trainer:737) INFO: 7epoch:train:9401-9500batch: iter_time=1.445e-04, forward_time=0.115, loss_ctc=60.517, loss_att=63.038, acc=0.666, loss=62.282, backward_time=0.137, grad_norm=23.558, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.979e-04, train_time=2.532 +[gpuc01:0/16] 2024-01-24 09:15:27,204 (trainer:737) INFO: 7epoch:train:9501-9600batch: iter_time=1.705e-04, forward_time=0.116, loss_ctc=58.713, loss_att=63.396, acc=0.681, loss=61.991, backward_time=0.138, grad_norm=24.687, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.978e-04, train_time=2.522 +[gpuc01:0/16] 2024-01-24 09:17:33,541 (trainer:737) INFO: 7epoch:train:9601-9700batch: iter_time=1.460e-04, forward_time=0.118, loss_ctc=60.847, loss_att=65.947, acc=0.656, loss=64.417, backward_time=0.137, grad_norm=25.363, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.977e-04, train_time=2.527 +[gpuc01:0/16] 2024-01-24 09:19:40,251 (trainer:737) INFO: 7epoch:train:9701-9800batch: iter_time=1.344e-04, forward_time=0.116, loss_ctc=69.174, loss_att=53.426, acc=0.681, loss=58.151, backward_time=0.137, grad_norm=29.721, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.976e-04, train_time=2.534 +[gpuc01:0/16] 2024-01-24 09:21:46,302 (trainer:737) INFO: 7epoch:train:9801-9900batch: iter_time=1.568e-04, forward_time=0.117, loss_ctc=66.594, loss_att=64.738, acc=0.652, loss=65.295, backward_time=0.137, grad_norm=32.321, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.066, optim0_lr0=3.975e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 09:23:52,329 (trainer:737) INFO: 7epoch:train:9901-10000batch: iter_time=2.552e-04, forward_time=0.117, loss_ctc=69.118, loss_att=65.887, acc=0.658, loss=66.856, backward_time=0.137, grad_norm=29.993, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.974e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 09:23:54,754 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpuc01:0/16] 2024-01-24 09:24:13,775 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 09:24:17,328 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 09:24:17,329 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpuc01:0/16] 2024-01-24 09:24:17,335 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 09:28:20,168 (trainer:737) INFO: 7epoch:train:10001-10100batch: iter_time=1.375, forward_time=0.117, loss_ctc=66.606, loss_att=65.640, acc=0.653, loss=65.930, backward_time=0.137, grad_norm=29.259, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.973e-04, train_time=5.357 +[gpuc01:0/16] 2024-01-24 09:30:23,162 (trainer:737) INFO: 7epoch:train:10101-10200batch: iter_time=1.389e-04, forward_time=0.118, loss_ctc=60.903, loss_att=58.176, acc=0.684, loss=58.994, backward_time=0.137, grad_norm=26.093, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.972e-04, train_time=2.460 +[gpuc01:0/16] 2024-01-24 09:32:25,934 (trainer:737) INFO: 7epoch:train:10201-10300batch: iter_time=1.289e-04, forward_time=0.118, loss_ctc=62.766, loss_att=56.995, acc=0.663, loss=58.726, backward_time=0.137, grad_norm=25.103, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.971e-04, train_time=2.455 +[gpuc01:0/16] 2024-01-24 09:34:29,208 (trainer:737) INFO: 7epoch:train:10301-10400batch: iter_time=1.422e-04, forward_time=0.118, loss_ctc=69.040, loss_att=68.335, acc=0.671, loss=68.546, backward_time=0.138, grad_norm=27.570, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.970e-04, train_time=2.465 +[gpuc01:0/16] 2024-01-24 09:36:31,763 (trainer:737) INFO: 7epoch:train:10401-10500batch: iter_time=1.682e-04, forward_time=0.118, loss_ctc=76.317, loss_att=64.919, acc=0.679, loss=68.338, backward_time=0.138, grad_norm=29.495, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.969e-04, train_time=2.451 +[gpuc01:0/16] 2024-01-24 09:38:34,262 (trainer:737) INFO: 7epoch:train:10501-10600batch: iter_time=1.658e-04, forward_time=0.116, loss_ctc=59.463, loss_att=59.521, acc=0.687, loss=59.503, backward_time=0.137, grad_norm=25.978, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.968e-04, train_time=2.450 +[gpuc01:0/16] 2024-01-24 09:40:37,782 (trainer:737) INFO: 7epoch:train:10601-10700batch: iter_time=1.429e-04, forward_time=0.116, loss_ctc=59.138, loss_att=53.639, acc=0.654, loss=55.289, backward_time=0.136, grad_norm=24.849, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.967e-04, train_time=2.470 +[gpuc01:0/16] 2024-01-24 09:42:41,288 (trainer:737) INFO: 7epoch:train:10701-10800batch: iter_time=1.553e-04, forward_time=0.117, loss_ctc=66.919, loss_att=59.181, acc=0.667, loss=61.502, backward_time=0.137, grad_norm=28.170, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.966e-04, train_time=2.470 +[gpuc01:0/16] 2024-01-24 09:44:44,573 (trainer:737) INFO: 7epoch:train:10801-10900batch: iter_time=1.546e-04, forward_time=0.117, loss_ctc=70.346, loss_att=64.065, acc=0.677, loss=65.950, backward_time=0.138, grad_norm=28.163, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.965e-04, train_time=2.465 +[gpuc01:0/16] 2024-01-24 09:46:47,977 (trainer:737) INFO: 7epoch:train:10901-11000batch: iter_time=1.371e-04, forward_time=0.116, loss_ctc=55.102, loss_att=56.364, acc=0.669, loss=55.986, backward_time=0.138, grad_norm=22.380, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.964e-04, train_time=2.468 +[gpuc01:0/16] 2024-01-24 09:48:51,889 (trainer:737) INFO: 7epoch:train:11001-11100batch: iter_time=1.534e-04, forward_time=0.117, loss_ctc=58.799, loss_att=62.042, acc=0.665, loss=61.069, backward_time=0.138, grad_norm=24.498, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.963e-04, train_time=2.478 +[gpuc01:0/16] 2024-01-24 09:50:56,372 (trainer:737) INFO: 7epoch:train:11101-11200batch: iter_time=1.608e-04, forward_time=0.116, loss_ctc=58.117, loss_att=56.605, acc=0.672, loss=57.059, backward_time=0.138, grad_norm=23.242, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.962e-04, train_time=2.489 +[gpuc01:0/16] 2024-01-24 09:53:00,785 (trainer:737) INFO: 7epoch:train:11201-11300batch: iter_time=1.478e-04, forward_time=0.117, loss_ctc=59.887, loss_att=54.135, acc=0.686, loss=55.860, backward_time=0.137, grad_norm=25.127, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.961e-04, train_time=2.488 +[gpuc01:0/16] 2024-01-24 09:55:04,592 (trainer:737) INFO: 7epoch:train:11301-11400batch: iter_time=1.582e-04, forward_time=0.117, loss_ctc=76.232, loss_att=72.508, acc=0.646, loss=73.625, backward_time=0.138, grad_norm=29.735, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.960e-04, train_time=2.476 +[gpuc01:0/16] 2024-01-24 09:57:09,052 (trainer:737) INFO: 7epoch:train:11401-11500batch: iter_time=1.811e-04, forward_time=0.117, loss_ctc=60.477, loss_att=68.867, acc=0.644, loss=66.350, backward_time=0.138, grad_norm=25.836, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.958e-04, train_time=2.489 +[gpuc01:0/16] 2024-01-24 09:59:13,729 (trainer:737) INFO: 7epoch:train:11501-11600batch: iter_time=1.735e-04, forward_time=0.116, loss_ctc=61.908, loss_att=55.107, acc=0.670, loss=57.147, backward_time=0.138, grad_norm=29.349, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.068, optim0_lr0=3.957e-04, train_time=2.493 +[gpuc01:0/16] 2024-01-24 10:01:17,777 (trainer:737) INFO: 7epoch:train:11601-11700batch: iter_time=1.519e-04, forward_time=0.116, loss_ctc=67.103, loss_att=72.441, acc=0.655, loss=70.840, backward_time=0.139, grad_norm=28.487, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.956e-04, train_time=2.481 +[gpuc01:0/16] 2024-01-24 10:03:22,220 (trainer:737) INFO: 7epoch:train:11701-11800batch: iter_time=1.459e-04, forward_time=0.118, loss_ctc=66.584, loss_att=63.341, acc=0.693, loss=64.314, backward_time=0.139, grad_norm=26.305, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.955e-04, train_time=2.489 +[gpuc01:0/16] 2024-01-24 10:05:26,505 (trainer:737) INFO: 7epoch:train:11801-11900batch: iter_time=1.419e-04, forward_time=0.117, loss_ctc=68.172, loss_att=65.414, acc=0.660, loss=66.242, backward_time=0.138, grad_norm=25.568, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.954e-04, train_time=2.485 +[gpuc01:0/16] 2024-01-24 10:07:30,558 (trainer:737) INFO: 7epoch:train:11901-12000batch: iter_time=1.413e-04, forward_time=0.117, loss_ctc=61.264, loss_att=62.564, acc=0.668, loss=62.174, backward_time=0.138, grad_norm=25.017, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.067, optim0_lr0=3.953e-04, train_time=2.481 +[gpuc01:0/16] 2024-01-24 10:09:34,589 (trainer:737) INFO: 7epoch:train:12001-12100batch: iter_time=1.455e-04, forward_time=0.116, loss_ctc=58.746, loss_att=62.958, acc=0.681, loss=61.695, backward_time=0.137, grad_norm=25.761, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.952e-04, train_time=2.480 +[gpuc01:0/16] 2024-01-24 10:11:38,824 (trainer:737) INFO: 7epoch:train:12101-12200batch: iter_time=1.481e-04, forward_time=0.117, loss_ctc=59.950, loss_att=65.019, acc=0.659, loss=63.499, backward_time=0.137, grad_norm=25.001, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.951e-04, train_time=2.484 +[gpuc01:0/16] 2024-01-24 10:13:43,005 (trainer:737) INFO: 7epoch:train:12201-12300batch: iter_time=1.374e-04, forward_time=0.116, loss_ctc=68.617, loss_att=52.340, acc=0.686, loss=57.223, backward_time=0.137, grad_norm=29.531, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.950e-04, train_time=2.483 +[gpuc01:0/16] 2024-01-24 10:15:47,645 (trainer:737) INFO: 7epoch:train:12301-12400batch: iter_time=1.461e-04, forward_time=0.119, loss_ctc=65.284, loss_att=62.561, acc=0.658, loss=63.378, backward_time=0.138, grad_norm=30.815, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.949e-04, train_time=2.493 +[gpuc01:0/16] 2024-01-24 10:17:51,845 (trainer:737) INFO: 7epoch:train:12401-12500batch: iter_time=1.411e-04, forward_time=0.116, loss_ctc=67.499, loss_att=65.554, acc=0.661, loss=66.137, backward_time=0.138, grad_norm=28.868, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.948e-04, train_time=2.484 +[gpuc01:0/16] 2024-01-24 10:17:54,526 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpuc01:0/16] 2024-01-24 10:18:14,147 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 10:18:17,703 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 10:18:17,703 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpuc01:0/16] 2024-01-24 10:18:17,709 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 10:22:17,909 (trainer:737) INFO: 7epoch:train:12501-12600batch: iter_time=1.388, forward_time=0.116, loss_ctc=65.548, loss_att=64.640, acc=0.658, loss=64.913, backward_time=0.138, grad_norm=28.598, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.068, optim0_lr0=3.947e-04, train_time=5.321 +[gpuc01:0/16] 2024-01-24 10:24:23,250 (trainer:737) INFO: 7epoch:train:12601-12700batch: iter_time=1.541e-04, forward_time=0.116, loss_ctc=61.195, loss_att=57.827, acc=0.683, loss=58.837, backward_time=0.138, grad_norm=23.610, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.068, optim0_lr0=3.946e-04, train_time=2.507 +[gpuc01:0/16] 2024-01-24 10:26:28,194 (trainer:737) INFO: 7epoch:train:12701-12800batch: iter_time=1.615e-04, forward_time=0.118, loss_ctc=62.222, loss_att=57.018, acc=0.661, loss=58.579, backward_time=0.137, grad_norm=24.219, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.945e-04, train_time=2.499 +[gpuc01:0/16] 2024-01-24 10:28:33,195 (trainer:737) INFO: 7epoch:train:12801-12900batch: iter_time=1.583e-04, forward_time=0.117, loss_ctc=68.365, loss_att=67.320, acc=0.675, loss=67.634, backward_time=0.139, grad_norm=26.820, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.944e-04, train_time=2.500 +[gpuc01:0/16] 2024-01-24 10:30:38,543 (trainer:737) INFO: 7epoch:train:12901-13000batch: iter_time=1.766e-04, forward_time=0.117, loss_ctc=76.469, loss_att=65.984, acc=0.677, loss=69.129, backward_time=0.138, grad_norm=29.433, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.943e-04, train_time=2.507 +[gpuc01:0/16] 2024-01-24 10:32:43,460 (trainer:737) INFO: 7epoch:train:13001-13100batch: iter_time=1.587e-04, forward_time=0.117, loss_ctc=59.029, loss_att=58.637, acc=0.691, loss=58.755, backward_time=0.137, grad_norm=23.161, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.942e-04, train_time=2.498 +[gpuc01:0/16] 2024-01-24 10:34:48,438 (trainer:737) INFO: 7epoch:train:13101-13200batch: iter_time=1.536e-04, forward_time=0.116, loss_ctc=58.501, loss_att=53.837, acc=0.656, loss=55.236, backward_time=0.136, grad_norm=25.368, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.941e-04, train_time=2.499 +[gpuc01:0/16] 2024-01-24 10:36:53,449 (trainer:737) INFO: 7epoch:train:13201-13300batch: iter_time=1.569e-04, forward_time=0.116, loss_ctc=66.433, loss_att=57.984, acc=0.672, loss=60.518, backward_time=0.137, grad_norm=27.877, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.940e-04, train_time=2.500 +[gpuc01:0/16] 2024-01-24 10:38:58,551 (trainer:737) INFO: 7epoch:train:13301-13400batch: iter_time=1.521e-04, forward_time=0.117, loss_ctc=70.265, loss_att=63.280, acc=0.678, loss=65.376, backward_time=0.137, grad_norm=28.178, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.939e-04, train_time=2.502 +[gpuc01:0/16] 2024-01-24 10:41:03,858 (trainer:737) INFO: 7epoch:train:13401-13500batch: iter_time=1.551e-04, forward_time=0.117, loss_ctc=54.476, loss_att=54.938, acc=0.678, loss=54.799, backward_time=0.137, grad_norm=22.587, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.938e-04, train_time=2.506 +[gpuc01:0/16] 2024-01-24 10:43:09,647 (trainer:737) INFO: 7epoch:train:13501-13600batch: iter_time=1.512e-04, forward_time=0.116, loss_ctc=58.779, loss_att=62.191, acc=0.666, loss=61.168, backward_time=0.137, grad_norm=24.647, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.937e-04, train_time=2.516 +[gpuc01:0/16] 2024-01-24 10:45:14,629 (trainer:737) INFO: 7epoch:train:13601-13700batch: iter_time=1.550e-04, forward_time=0.120, loss_ctc=56.958, loss_att=56.482, acc=0.673, loss=56.625, backward_time=0.136, grad_norm=24.970, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.936e-04, train_time=2.499 +[gpuc01:0/16] 2024-01-24 10:47:19,317 (trainer:737) INFO: 7epoch:train:13701-13800batch: iter_time=1.419e-04, forward_time=0.117, loss_ctc=59.544, loss_att=54.172, acc=0.688, loss=55.784, backward_time=0.137, grad_norm=24.633, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.935e-04, train_time=2.494 +[gpuc01:0/16] 2024-01-24 10:49:24,234 (trainer:737) INFO: 7epoch:train:13801-13900batch: iter_time=1.312e-04, forward_time=0.118, loss_ctc=77.043, loss_att=72.220, acc=0.651, loss=73.667, backward_time=0.137, grad_norm=32.307, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.934e-04, train_time=2.498 +[gpuc01:0/16] 2024-01-24 10:51:29,256 (trainer:737) INFO: 7epoch:train:13901-14000batch: iter_time=1.453e-04, forward_time=0.118, loss_ctc=59.507, loss_att=67.330, acc=0.647, loss=64.983, backward_time=0.137, grad_norm=28.715, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.933e-04, train_time=2.500 +[gpuc01:0/16] 2024-01-24 10:53:34,231 (trainer:737) INFO: 7epoch:train:14001-14100batch: iter_time=1.446e-04, forward_time=0.118, loss_ctc=60.329, loss_att=52.377, acc=0.681, loss=54.762, backward_time=0.136, grad_norm=27.185, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.932e-04, train_time=2.499 +[gpuc01:0/16] 2024-01-24 10:55:39,443 (trainer:737) INFO: 7epoch:train:14101-14200batch: iter_time=1.355e-04, forward_time=0.119, loss_ctc=68.370, loss_att=74.027, acc=0.651, loss=72.330, backward_time=0.138, grad_norm=32.023, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.066, optim0_lr0=3.931e-04, train_time=2.504 +[gpuc01:0/16] 2024-01-24 10:57:44,780 (trainer:737) INFO: 7epoch:train:14201-14300batch: iter_time=1.387e-04, forward_time=0.119, loss_ctc=65.869, loss_att=62.829, acc=0.694, loss=63.741, backward_time=0.138, grad_norm=26.681, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.930e-04, train_time=2.507 +[gpuc01:0/16] 2024-01-24 10:59:51,194 (trainer:737) INFO: 7epoch:train:14301-14400batch: iter_time=1.421e-04, forward_time=0.117, loss_ctc=66.610, loss_att=64.811, acc=0.663, loss=65.351, backward_time=0.137, grad_norm=25.093, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.929e-04, train_time=2.528 +[gpuc01:0/16] 2024-01-24 11:01:57,483 (trainer:737) INFO: 7epoch:train:14401-14500batch: iter_time=1.447e-04, forward_time=0.117, loss_ctc=61.260, loss_att=62.137, acc=0.670, loss=61.874, backward_time=0.137, grad_norm=24.396, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.928e-04, train_time=2.526 +[gpuc01:0/16] 2024-01-24 11:04:03,551 (trainer:737) INFO: 7epoch:train:14501-14600batch: iter_time=1.373e-04, forward_time=0.116, loss_ctc=57.711, loss_att=61.284, acc=0.687, loss=60.212, backward_time=0.137, grad_norm=28.591, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.066, optim0_lr0=3.927e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 11:06:09,773 (trainer:737) INFO: 7epoch:train:14601-14700batch: iter_time=1.494e-04, forward_time=0.117, loss_ctc=59.619, loss_att=65.165, acc=0.655, loss=63.501, backward_time=0.137, grad_norm=25.629, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.926e-04, train_time=2.524 +[gpuc01:0/16] 2024-01-24 11:08:16,036 (trainer:737) INFO: 7epoch:train:14701-14800batch: iter_time=1.412e-04, forward_time=0.116, loss_ctc=68.167, loss_att=52.670, acc=0.684, loss=57.319, backward_time=0.137, grad_norm=31.521, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.925e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 11:10:22,132 (trainer:737) INFO: 7epoch:train:14801-14900batch: iter_time=1.488e-04, forward_time=0.116, loss_ctc=65.326, loss_att=63.096, acc=0.659, loss=63.765, backward_time=0.137, grad_norm=31.320, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.924e-04, train_time=2.522 +[gpuc01:0/16] 2024-01-24 11:12:28,370 (trainer:737) INFO: 7epoch:train:14901-15000batch: iter_time=1.435e-04, forward_time=0.117, loss_ctc=65.800, loss_att=64.416, acc=0.661, loss=64.831, backward_time=0.137, grad_norm=30.467, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.923e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 11:12:31,082 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpuc01:0/16] 2024-01-24 11:12:50,152 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 11:12:53,779 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 11:12:53,780 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpuc01:0/16] 2024-01-24 11:12:53,786 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 11:16:56,226 (trainer:737) INFO: 7epoch:train:15001-15100batch: iter_time=1.390, forward_time=0.121, loss_ctc=64.804, loss_att=67.808, acc=0.659, loss=66.907, backward_time=0.137, grad_norm=31.395, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.922e-04, train_time=5.357 +[gpuc01:0/16] 2024-01-24 11:19:03,123 (trainer:737) INFO: 7epoch:train:15101-15200batch: iter_time=1.858e-04, forward_time=0.117, loss_ctc=61.259, loss_att=59.975, acc=0.689, loss=60.360, backward_time=0.137, grad_norm=25.858, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.921e-04, train_time=2.538 +[gpuc01:0/16] 2024-01-24 11:21:09,764 (trainer:737) INFO: 7epoch:train:15201-15300batch: iter_time=2.082e-04, forward_time=0.117, loss_ctc=61.553, loss_att=59.496, acc=0.675, loss=60.113, backward_time=0.137, grad_norm=24.845, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.920e-04, train_time=2.533 +[gpuc01:0/16] 2024-01-24 11:23:16,606 (trainer:737) INFO: 7epoch:train:15301-15400batch: iter_time=1.790e-04, forward_time=0.116, loss_ctc=67.340, loss_att=69.080, acc=0.675, loss=68.558, backward_time=0.138, grad_norm=28.878, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.919e-04, train_time=2.537 +[gpuc01:0/16] 2024-01-24 11:25:23,142 (trainer:737) INFO: 7epoch:train:15401-15500batch: iter_time=2.063e-04, forward_time=0.117, loss_ctc=74.376, loss_att=64.904, acc=0.683, loss=67.746, backward_time=0.137, grad_norm=29.089, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.918e-04, train_time=2.530 +[gpuc01:0/16] 2024-01-24 11:27:29,679 (trainer:737) INFO: 7epoch:train:15501-15600batch: iter_time=1.840e-04, forward_time=0.116, loss_ctc=58.586, loss_att=60.571, acc=0.688, loss=59.976, backward_time=0.137, grad_norm=24.015, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.917e-04, train_time=2.531 +[gpuc01:0/16] 2024-01-24 11:29:36,835 (trainer:737) INFO: 7epoch:train:15601-15700batch: iter_time=3.199e-04, forward_time=0.118, loss_ctc=57.373, loss_att=55.089, acc=0.661, loss=55.774, backward_time=0.137, grad_norm=24.868, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.067, optim0_lr0=3.916e-04, train_time=2.543 +[gpuc01:0/16] 2024-01-24 11:31:43,904 (trainer:737) INFO: 7epoch:train:15701-15800batch: iter_time=2.521e-04, forward_time=0.119, loss_ctc=65.601, loss_att=60.334, acc=0.672, loss=61.914, backward_time=0.137, grad_norm=29.647, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.068, optim0_lr0=3.915e-04, train_time=2.541 +[gpuc01:0/16] 2024-01-24 11:33:51,119 (trainer:737) INFO: 7epoch:train:15801-15900batch: iter_time=2.590e-04, forward_time=0.116, loss_ctc=69.319, loss_att=63.116, acc=0.683, loss=64.977, backward_time=0.138, grad_norm=26.411, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.068, optim0_lr0=3.914e-04, train_time=2.544 +[gpuc01:0/16] 2024-01-24 11:35:58,069 (trainer:737) INFO: 7epoch:train:15901-16000batch: iter_time=2.432e-04, forward_time=0.117, loss_ctc=53.673, loss_att=55.179, acc=0.686, loss=54.727, backward_time=0.138, grad_norm=22.711, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.068, optim0_lr0=3.913e-04, train_time=2.539 +[gpuc01:0/16] 2024-01-24 11:38:05,346 (trainer:737) INFO: 7epoch:train:16001-16100batch: iter_time=2.613e-04, forward_time=0.117, loss_ctc=58.776, loss_att=63.857, acc=0.669, loss=62.333, backward_time=0.138, grad_norm=26.039, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.912e-04, train_time=2.545 +[gpuc01:0/16] 2024-01-24 11:40:12,769 (trainer:737) INFO: 7epoch:train:16101-16200batch: iter_time=2.661e-04, forward_time=0.116, loss_ctc=56.686, loss_att=56.945, acc=0.685, loss=56.868, backward_time=0.137, grad_norm=22.830, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.911e-04, train_time=2.548 +[gpuc01:0/16] 2024-01-24 11:42:19,839 (trainer:737) INFO: 7epoch:train:16201-16300batch: iter_time=2.664e-04, forward_time=0.117, loss_ctc=59.909, loss_att=55.306, acc=0.698, loss=56.686, backward_time=0.138, grad_norm=24.935, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.910e-04, train_time=2.541 +[gpuc01:0/16] 2024-01-24 11:44:27,749 (trainer:737) INFO: 7epoch:train:16301-16400batch: iter_time=2.602e-04, forward_time=0.118, loss_ctc=75.921, loss_att=73.901, acc=0.660, loss=74.507, backward_time=0.139, grad_norm=32.910, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.909e-04, train_time=2.558 +[gpuc01:0/16] 2024-01-24 11:46:35,840 (trainer:737) INFO: 7epoch:train:16401-16500batch: iter_time=2.842e-04, forward_time=0.117, loss_ctc=59.754, loss_att=69.726, acc=0.656, loss=66.734, backward_time=0.138, grad_norm=26.782, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.908e-04, train_time=2.562 +[gpuc01:0/16] 2024-01-24 11:48:42,796 (trainer:737) INFO: 7epoch:train:16501-16600batch: iter_time=2.779e-04, forward_time=0.117, loss_ctc=60.420, loss_att=53.801, acc=0.686, loss=55.787, backward_time=0.138, grad_norm=28.075, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.907e-04, train_time=2.539 +[gpuc01:0/16] 2024-01-24 11:50:49,483 (trainer:737) INFO: 7epoch:train:16601-16700batch: iter_time=3.008e-04, forward_time=0.117, loss_ctc=67.986, loss_att=79.345, acc=0.654, loss=75.937, backward_time=0.138, grad_norm=30.741, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.906e-04, train_time=2.534 +[gpuc01:0/16] 2024-01-24 11:52:56,324 (trainer:737) INFO: 7epoch:train:16701-16800batch: iter_time=2.636e-04, forward_time=0.117, loss_ctc=66.181, loss_att=67.265, acc=0.690, loss=66.940, backward_time=0.139, grad_norm=27.789, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.905e-04, train_time=2.537 +[gpuc01:0/16] 2024-01-24 11:55:03,979 (trainer:737) INFO: 7epoch:train:16801-16900batch: iter_time=2.838e-04, forward_time=0.117, loss_ctc=66.606, loss_att=64.651, acc=0.671, loss=65.237, backward_time=0.138, grad_norm=25.165, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.904e-04, train_time=2.553 +[gpuc01:0/16] 2024-01-24 11:57:11,075 (trainer:737) INFO: 7epoch:train:16901-17000batch: iter_time=2.637e-04, forward_time=0.117, loss_ctc=59.762, loss_att=64.010, acc=0.682, loss=62.735, backward_time=0.138, grad_norm=23.949, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.903e-04, train_time=2.542 +[gpuc01:0/16] 2024-01-24 11:59:18,158 (trainer:737) INFO: 7epoch:train:17001-17100batch: iter_time=2.793e-04, forward_time=0.117, loss_ctc=57.903, loss_att=62.381, acc=0.683, loss=61.038, backward_time=0.138, grad_norm=24.727, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.902e-04, train_time=2.541 +[gpuc01:0/16] 2024-01-24 12:01:25,371 (trainer:737) INFO: 7epoch:train:17101-17200batch: iter_time=2.928e-04, forward_time=0.117, loss_ctc=59.134, loss_att=66.166, acc=0.669, loss=64.057, backward_time=0.139, grad_norm=24.274, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.901e-04, train_time=2.544 +[gpuc01:0/16] 2024-01-24 12:03:32,191 (trainer:737) INFO: 7epoch:train:17201-17300batch: iter_time=2.107e-04, forward_time=0.119, loss_ctc=67.301, loss_att=53.519, acc=0.696, loss=57.653, backward_time=0.138, grad_norm=28.211, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.900e-04, train_time=2.536 +[gpuc01:0/16] 2024-01-24 12:05:39,131 (trainer:737) INFO: 7epoch:train:17301-17400batch: iter_time=1.699e-04, forward_time=0.119, loss_ctc=64.176, loss_att=63.886, acc=0.661, loss=63.973, backward_time=0.139, grad_norm=32.280, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.899e-04, train_time=2.539 +[gpuc01:0/16] 2024-01-24 12:07:47,233 (trainer:737) INFO: 7epoch:train:17401-17500batch: iter_time=1.826e-04, forward_time=0.120, loss_ctc=66.388, loss_att=65.513, acc=0.679, loss=65.776, backward_time=0.138, grad_norm=28.974, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.898e-04, train_time=2.562 +[gpuc01:0/16] 2024-01-24 12:07:50,425 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpuc01:0/16] 2024-01-24 12:08:09,749 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 12:08:13,729 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 12:08:13,730 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpuc01:0/16] 2024-01-24 12:08:13,736 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 12:12:14,116 (trainer:737) INFO: 7epoch:train:17501-17600batch: iter_time=1.402, forward_time=0.117, loss_ctc=65.274, loss_att=66.017, acc=0.663, loss=65.794, backward_time=0.137, grad_norm=29.420, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.897e-04, train_time=5.337 +[gpuc01:0/16] 2024-01-24 12:14:20,687 (trainer:737) INFO: 7epoch:train:17601-17700batch: iter_time=2.312e-04, forward_time=0.117, loss_ctc=60.171, loss_att=57.839, acc=0.692, loss=58.538, backward_time=0.137, grad_norm=23.648, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.896e-04, train_time=2.531 +[gpuc01:0/16] 2024-01-24 12:16:27,545 (trainer:737) INFO: 7epoch:train:17701-17800batch: iter_time=2.127e-04, forward_time=0.117, loss_ctc=61.088, loss_att=56.824, acc=0.680, loss=58.103, backward_time=0.138, grad_norm=23.759, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.895e-04, train_time=2.537 +[gpuc01:0/16] 2024-01-24 12:18:33,326 (trainer:737) INFO: 7epoch:train:17801-17900batch: iter_time=1.847e-04, forward_time=0.115, loss_ctc=67.363, loss_att=66.673, acc=0.688, loss=66.880, backward_time=0.138, grad_norm=26.978, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.894e-04, train_time=2.515 +[gpuc01:0/16] 2024-01-24 12:20:39,057 (trainer:737) INFO: 7epoch:train:17901-18000batch: iter_time=1.975e-04, forward_time=0.116, loss_ctc=74.580, loss_att=64.678, acc=0.688, loss=67.649, backward_time=0.137, grad_norm=29.521, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.893e-04, train_time=2.514 +[gpuc01:0/16] 2024-01-24 12:22:44,967 (trainer:737) INFO: 7epoch:train:18001-18100batch: iter_time=2.323e-04, forward_time=0.117, loss_ctc=58.393, loss_att=59.609, acc=0.692, loss=59.244, backward_time=0.137, grad_norm=24.203, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.892e-04, train_time=2.518 +[gpuc01:0/16] 2024-01-24 12:24:50,767 (trainer:737) INFO: 7epoch:train:18101-18200batch: iter_time=2.286e-04, forward_time=0.114, loss_ctc=57.775, loss_att=53.779, acc=0.665, loss=54.977, backward_time=0.136, grad_norm=23.700, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.891e-04, train_time=2.516 +[gpuc01:0/16] 2024-01-24 12:26:56,967 (trainer:737) INFO: 7epoch:train:18201-18300batch: iter_time=2.369e-04, forward_time=0.115, loss_ctc=66.086, loss_att=60.566, acc=0.671, loss=62.222, backward_time=0.137, grad_norm=27.896, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.890e-04, train_time=2.524 +[gpuc01:0/16] 2024-01-24 12:29:02,985 (trainer:737) INFO: 7epoch:train:18301-18400batch: iter_time=2.020e-04, forward_time=0.115, loss_ctc=69.201, loss_att=62.976, acc=0.683, loss=64.844, backward_time=0.138, grad_norm=26.517, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.889e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 12:31:08,570 (trainer:737) INFO: 7epoch:train:18401-18500batch: iter_time=2.357e-04, forward_time=0.114, loss_ctc=54.010, loss_att=56.024, acc=0.683, loss=55.420, backward_time=0.137, grad_norm=22.347, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.888e-04, train_time=2.511 +[gpuc01:0/16] 2024-01-24 12:33:14,038 (trainer:737) INFO: 7epoch:train:18501-18600batch: iter_time=2.448e-04, forward_time=0.114, loss_ctc=58.363, loss_att=62.397, acc=0.673, loss=61.186, backward_time=0.137, grad_norm=24.608, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.887e-04, train_time=2.509 +[gpuc01:0/16] 2024-01-24 12:35:19,844 (trainer:737) INFO: 7epoch:train:18601-18700batch: iter_time=2.471e-04, forward_time=0.115, loss_ctc=57.125, loss_att=56.612, acc=0.685, loss=56.766, backward_time=0.136, grad_norm=22.526, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.886e-04, train_time=2.516 +[gpuc01:0/16] 2024-01-24 12:37:25,784 (trainer:737) INFO: 7epoch:train:18701-18800batch: iter_time=2.236e-04, forward_time=0.115, loss_ctc=58.546, loss_att=53.851, acc=0.702, loss=55.260, backward_time=0.137, grad_norm=23.102, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.885e-04, train_time=2.519 +[gpuc01:0/16] 2024-01-24 12:39:31,964 (trainer:737) INFO: 7epoch:train:18801-18900batch: iter_time=2.100e-04, forward_time=0.116, loss_ctc=74.678, loss_att=71.611, acc=0.665, loss=72.531, backward_time=0.137, grad_norm=39.268, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.884e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 12:41:38,212 (trainer:737) INFO: 7epoch:train:18901-19000batch: iter_time=2.067e-04, forward_time=0.115, loss_ctc=58.079, loss_att=67.013, acc=0.665, loss=64.333, backward_time=0.137, grad_norm=26.047, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.883e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 12:43:44,298 (trainer:737) INFO: 7epoch:train:19001-19100batch: iter_time=1.838e-04, forward_time=0.114, loss_ctc=58.575, loss_att=51.195, acc=0.694, loss=53.409, backward_time=0.136, grad_norm=26.762, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.882e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 12:45:50,640 (trainer:737) INFO: 7epoch:train:19101-19200batch: iter_time=2.130e-04, forward_time=0.116, loss_ctc=66.804, loss_att=78.851, acc=0.655, loss=75.237, backward_time=0.137, grad_norm=29.184, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.881e-04, train_time=2.527 +[gpuc01:0/16] 2024-01-24 12:47:56,570 (trainer:737) INFO: 7epoch:train:19201-19300batch: iter_time=1.774e-04, forward_time=0.115, loss_ctc=64.988, loss_att=63.641, acc=0.700, loss=64.045, backward_time=0.138, grad_norm=26.245, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.880e-04, train_time=2.518 +[gpuc01:0/16] 2024-01-24 12:50:02,536 (trainer:737) INFO: 7epoch:train:19301-19400batch: iter_time=2.022e-04, forward_time=0.114, loss_ctc=65.790, loss_att=63.599, acc=0.675, loss=64.256, backward_time=0.137, grad_norm=25.529, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.879e-04, train_time=2.519 +[gpuc01:0/16] 2024-01-24 12:52:09,008 (trainer:737) INFO: 7epoch:train:19401-19500batch: iter_time=2.046e-04, forward_time=0.118, loss_ctc=58.821, loss_att=61.372, acc=0.688, loss=60.607, backward_time=0.137, grad_norm=24.842, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.878e-04, train_time=2.529 +[gpuc01:0/16] 2024-01-24 12:54:15,080 (trainer:737) INFO: 7epoch:train:19501-19600batch: iter_time=2.021e-04, forward_time=0.113, loss_ctc=56.704, loss_att=60.338, acc=0.693, loss=59.248, backward_time=0.137, grad_norm=25.359, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.877e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 12:56:21,115 (trainer:737) INFO: 7epoch:train:19601-19700batch: iter_time=1.989e-04, forward_time=0.115, loss_ctc=59.328, loss_att=64.968, acc=0.674, loss=63.276, backward_time=0.137, grad_norm=25.282, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.876e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 12:58:27,283 (trainer:737) INFO: 7epoch:train:19701-19800batch: iter_time=2.089e-04, forward_time=0.116, loss_ctc=67.283, loss_att=52.678, acc=0.697, loss=57.059, backward_time=0.137, grad_norm=32.040, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.068, optim0_lr0=3.875e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 13:00:33,402 (trainer:737) INFO: 7epoch:train:19801-19900batch: iter_time=1.751e-04, forward_time=0.115, loss_ctc=64.075, loss_att=63.414, acc=0.663, loss=63.612, backward_time=0.137, grad_norm=28.867, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.874e-04, train_time=2.522 +[gpuc01:0/16] 2024-01-24 13:02:39,661 (trainer:737) INFO: 7epoch:train:19901-20000batch: iter_time=1.539e-04, forward_time=0.115, loss_ctc=64.946, loss_att=65.224, acc=0.678, loss=65.141, backward_time=0.137, grad_norm=27.913, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.067, optim0_lr0=3.873e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 13:02:41,819 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpuc01:0/16] 2024-01-24 13:03:01,562 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 13:03:05,229 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 13:03:05,229 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpuc01:0/16] 2024-01-24 13:03:05,235 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 13:07:07,825 (trainer:737) INFO: 7epoch:train:20001-20100batch: iter_time=1.394, forward_time=0.117, loss_ctc=64.506, loss_att=64.507, acc=0.670, loss=64.507, backward_time=0.138, grad_norm=30.053, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.872e-04, train_time=5.363 +[gpuc01:0/16] 2024-01-24 13:09:14,192 (trainer:737) INFO: 7epoch:train:20101-20200batch: iter_time=2.320e-04, forward_time=0.117, loss_ctc=59.349, loss_att=57.603, acc=0.695, loss=58.127, backward_time=0.137, grad_norm=23.875, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.872e-04, train_time=2.527 +[gpuc01:0/16] 2024-01-24 13:11:20,769 (trainer:737) INFO: 7epoch:train:20201-20300batch: iter_time=2.178e-04, forward_time=0.116, loss_ctc=60.563, loss_att=55.733, acc=0.685, loss=57.182, backward_time=0.137, grad_norm=25.244, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.871e-04, train_time=2.531 +[gpuc01:0/16] 2024-01-24 13:13:27,874 (trainer:737) INFO: 7epoch:train:20301-20400batch: iter_time=2.239e-04, forward_time=0.118, loss_ctc=67.294, loss_att=67.957, acc=0.683, loss=67.758, backward_time=0.138, grad_norm=28.023, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.870e-04, train_time=2.542 +[gpuc01:0/16] 2024-01-24 13:15:34,496 (trainer:737) INFO: 7epoch:train:20401-20500batch: iter_time=1.649e-04, forward_time=0.119, loss_ctc=72.939, loss_att=63.485, acc=0.690, loss=66.321, backward_time=0.137, grad_norm=28.919, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.869e-04, train_time=2.532 +[gpuc01:0/16] 2024-01-24 13:17:41,338 (trainer:737) INFO: 7epoch:train:20501-20600batch: iter_time=2.056e-04, forward_time=0.117, loss_ctc=58.493, loss_att=59.487, acc=0.693, loss=59.189, backward_time=0.137, grad_norm=22.774, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.868e-04, train_time=2.537 +[gpuc01:0/16] 2024-01-24 13:19:48,144 (trainer:737) INFO: 7epoch:train:20601-20700batch: iter_time=2.088e-04, forward_time=0.116, loss_ctc=57.245, loss_att=53.641, acc=0.667, loss=54.722, backward_time=0.137, grad_norm=22.749, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.867e-04, train_time=2.536 +[gpuc01:0/16] 2024-01-24 13:21:55,006 (trainer:737) INFO: 7epoch:train:20701-20800batch: iter_time=2.244e-04, forward_time=0.116, loss_ctc=65.049, loss_att=59.274, acc=0.678, loss=61.006, backward_time=0.137, grad_norm=26.684, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.866e-04, train_time=2.537 +[gpuc01:0/16] 2024-01-24 13:24:01,675 (trainer:737) INFO: 7epoch:train:20801-20900batch: iter_time=2.273e-04, forward_time=0.117, loss_ctc=67.200, loss_att=61.063, acc=0.692, loss=62.904, backward_time=0.137, grad_norm=24.705, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.865e-04, train_time=2.533 +[gpuc01:0/16] 2024-01-24 13:26:08,230 (trainer:737) INFO: 7epoch:train:20901-21000batch: iter_time=2.399e-04, forward_time=0.117, loss_ctc=53.418, loss_att=54.551, acc=0.688, loss=54.211, backward_time=0.137, grad_norm=22.750, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.864e-04, train_time=2.531 +[gpuc01:0/16] 2024-01-24 13:28:15,337 (trainer:737) INFO: 7epoch:train:21001-21100batch: iter_time=2.291e-04, forward_time=0.120, loss_ctc=57.213, loss_att=61.338, acc=0.674, loss=60.101, backward_time=0.137, grad_norm=23.252, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.863e-04, train_time=2.542 +[gpuc01:0/16] 2024-01-24 13:30:21,863 (trainer:737) INFO: 7epoch:train:21101-21200batch: iter_time=2.192e-04, forward_time=0.116, loss_ctc=56.268, loss_att=55.142, acc=0.691, loss=55.479, backward_time=0.137, grad_norm=22.084, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.862e-04, train_time=2.530 +[gpuc01:0/16] 2024-01-24 13:32:28,275 (trainer:737) INFO: 7epoch:train:21201-21300batch: iter_time=2.276e-04, forward_time=0.116, loss_ctc=58.508, loss_att=53.060, acc=0.704, loss=54.694, backward_time=0.138, grad_norm=24.009, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.861e-04, train_time=2.528 +[gpuc01:0/16] 2024-01-24 13:34:34,793 (trainer:737) INFO: 7epoch:train:21301-21400batch: iter_time=2.218e-04, forward_time=0.117, loss_ctc=74.579, loss_att=70.987, acc=0.666, loss=72.065, backward_time=0.139, grad_norm=30.705, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.860e-04, train_time=2.530 +[gpuc01:0/16] 2024-01-24 13:36:42,163 (trainer:737) INFO: 7epoch:train:21401-21500batch: iter_time=2.300e-04, forward_time=0.117, loss_ctc=57.867, loss_att=66.654, acc=0.665, loss=64.018, backward_time=0.138, grad_norm=25.543, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.859e-04, train_time=2.547 +[gpuc01:0/16] 2024-01-24 13:38:48,615 (trainer:737) INFO: 7epoch:train:21501-21600batch: iter_time=2.526e-04, forward_time=0.117, loss_ctc=59.482, loss_att=52.764, acc=0.690, loss=54.780, backward_time=0.137, grad_norm=28.446, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.858e-04, train_time=2.529 +[gpuc01:0/16] 2024-01-24 13:40:55,551 (trainer:737) INFO: 7epoch:train:21601-21700batch: iter_time=2.368e-04, forward_time=0.117, loss_ctc=66.974, loss_att=77.340, acc=0.659, loss=74.230, backward_time=0.138, grad_norm=32.327, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.857e-04, train_time=2.538 +[gpuc01:0/16] 2024-01-24 13:43:02,147 (trainer:737) INFO: 7epoch:train:21701-21800batch: iter_time=2.146e-04, forward_time=0.117, loss_ctc=65.432, loss_att=64.784, acc=0.696, loss=64.979, backward_time=0.139, grad_norm=28.003, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.856e-04, train_time=2.532 +[gpuc01:0/16] 2024-01-24 13:45:09,475 (trainer:737) INFO: 7epoch:train:21801-21900batch: iter_time=2.058e-04, forward_time=0.117, loss_ctc=64.828, loss_att=63.602, acc=0.675, loss=63.970, backward_time=0.138, grad_norm=24.272, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.855e-04, train_time=2.546 +[gpuc01:0/16] 2024-01-24 13:47:16,267 (trainer:737) INFO: 7epoch:train:21901-22000batch: iter_time=2.132e-04, forward_time=0.117, loss_ctc=58.771, loss_att=61.842, acc=0.687, loss=60.921, backward_time=0.138, grad_norm=22.822, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.854e-04, train_time=2.536 +[gpuc01:0/16] 2024-01-24 13:49:22,963 (trainer:737) INFO: 7epoch:train:22001-22100batch: iter_time=2.022e-04, forward_time=0.117, loss_ctc=56.140, loss_att=62.741, acc=0.686, loss=60.761, backward_time=0.137, grad_norm=24.601, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.853e-04, train_time=2.534 +[gpuc01:0/16] 2024-01-24 13:51:30,001 (trainer:737) INFO: 7epoch:train:22101-22200batch: iter_time=1.928e-04, forward_time=0.117, loss_ctc=58.188, loss_att=65.019, acc=0.674, loss=62.969, backward_time=0.138, grad_norm=25.435, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.852e-04, train_time=2.541 +[gpuc01:0/16] 2024-01-24 13:53:37,898 (trainer:737) INFO: 7epoch:train:22201-22300batch: iter_time=1.876e-04, forward_time=0.117, loss_ctc=67.148, loss_att=51.832, acc=0.700, loss=56.427, backward_time=0.138, grad_norm=30.177, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.851e-04, train_time=2.558 +[gpuc01:0/16] 2024-01-24 13:55:45,013 (trainer:737) INFO: 7epoch:train:22301-22400batch: iter_time=1.966e-04, forward_time=0.117, loss_ctc=62.350, loss_att=62.600, acc=0.666, loss=62.525, backward_time=0.137, grad_norm=28.947, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.850e-04, train_time=2.542 +[gpuc01:0/16] 2024-01-24 13:57:52,245 (trainer:737) INFO: 7epoch:train:22401-22500batch: iter_time=1.799e-04, forward_time=0.117, loss_ctc=65.068, loss_att=65.232, acc=0.682, loss=65.183, backward_time=0.138, grad_norm=28.815, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.849e-04, train_time=2.544 +[gpuc01:0/16] 2024-01-24 13:57:54,784 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpuc01:0/16] 2024-01-24 13:58:13,955 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 13:58:17,662 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 13:58:17,662 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpuc01:0/16] 2024-01-24 13:58:17,669 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 14:02:21,701 (trainer:737) INFO: 7epoch:train:22501-22600batch: iter_time=1.405, forward_time=0.116, loss_ctc=63.331, loss_att=65.301, acc=0.663, loss=64.710, backward_time=0.138, grad_norm=30.728, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.848e-04, train_time=5.389 +[gpuc01:0/16] 2024-01-24 14:04:27,879 (trainer:737) INFO: 7epoch:train:22601-22700batch: iter_time=1.956e-04, forward_time=0.116, loss_ctc=59.043, loss_att=58.100, acc=0.689, loss=58.383, backward_time=0.137, grad_norm=24.267, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.848e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 14:06:34,088 (trainer:737) INFO: 7epoch:train:22701-22800batch: iter_time=2.023e-04, forward_time=0.119, loss_ctc=60.369, loss_att=57.812, acc=0.665, loss=58.579, backward_time=0.137, grad_norm=24.004, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.847e-04, train_time=2.524 +[gpuc01:0/16] 2024-01-24 14:08:40,273 (trainer:737) INFO: 7epoch:train:22801-22900batch: iter_time=2.233e-04, forward_time=0.117, loss_ctc=66.266, loss_att=66.641, acc=0.679, loss=66.529, backward_time=0.138, grad_norm=26.488, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.846e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 14:10:46,658 (trainer:737) INFO: 7epoch:train:22901-23000batch: iter_time=2.192e-04, forward_time=0.117, loss_ctc=72.651, loss_att=62.892, acc=0.689, loss=65.820, backward_time=0.137, grad_norm=27.833, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.845e-04, train_time=2.527 +[gpuc01:0/16] 2024-01-24 14:12:53,854 (trainer:737) INFO: 7epoch:train:23001-23100batch: iter_time=2.031e-04, forward_time=0.117, loss_ctc=57.082, loss_att=57.978, acc=0.694, loss=57.709, backward_time=0.138, grad_norm=22.495, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.844e-04, train_time=2.544 +[gpuc01:0/16] 2024-01-24 14:15:00,078 (trainer:737) INFO: 7epoch:train:23101-23200batch: iter_time=2.005e-04, forward_time=0.116, loss_ctc=57.480, loss_att=53.237, acc=0.662, loss=54.510, backward_time=0.136, grad_norm=24.959, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.843e-04, train_time=2.524 +[gpuc01:0/16] 2024-01-24 14:17:06,369 (trainer:737) INFO: 7epoch:train:23201-23300batch: iter_time=2.240e-04, forward_time=0.117, loss_ctc=63.963, loss_att=56.821, acc=0.677, loss=58.964, backward_time=0.137, grad_norm=26.459, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.842e-04, train_time=2.526 +[gpuc01:0/16] 2024-01-24 14:19:12,425 (trainer:737) INFO: 7epoch:train:23301-23400batch: iter_time=2.268e-04, forward_time=0.117, loss_ctc=67.169, loss_att=61.777, acc=0.684, loss=63.394, backward_time=0.138, grad_norm=28.356, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.841e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 14:21:18,906 (trainer:737) INFO: 7epoch:train:23401-23500batch: iter_time=2.423e-04, forward_time=0.116, loss_ctc=53.458, loss_att=54.939, acc=0.677, loss=54.495, backward_time=0.137, grad_norm=23.175, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.840e-04, train_time=2.529 +[gpuc01:0/16] 2024-01-24 14:23:24,904 (trainer:737) INFO: 7epoch:train:23501-23600batch: iter_time=1.856e-04, forward_time=0.117, loss_ctc=57.068, loss_att=60.439, acc=0.674, loss=59.427, backward_time=0.137, grad_norm=23.988, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.839e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 14:25:30,437 (trainer:737) INFO: 7epoch:train:23601-23700batch: iter_time=1.860e-04, forward_time=0.116, loss_ctc=55.623, loss_att=54.786, acc=0.680, loss=55.037, backward_time=0.137, grad_norm=23.740, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.838e-04, train_time=2.510 +[gpuc01:0/16] 2024-01-24 14:27:36,597 (trainer:737) INFO: 7epoch:train:23701-23800batch: iter_time=1.889e-04, forward_time=0.116, loss_ctc=57.538, loss_att=54.049, acc=0.690, loss=55.096, backward_time=0.137, grad_norm=24.816, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.837e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 14:29:42,576 (trainer:737) INFO: 7epoch:train:23801-23900batch: iter_time=1.889e-04, forward_time=0.117, loss_ctc=74.158, loss_att=71.585, acc=0.653, loss=72.357, backward_time=0.138, grad_norm=31.063, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.067, optim0_lr0=3.836e-04, train_time=2.519 +[gpuc01:0/16] 2024-01-24 14:31:49,610 (trainer:737) INFO: 7epoch:train:23901-24000batch: iter_time=1.785e-04, forward_time=0.116, loss_ctc=57.316, loss_att=67.448, acc=0.651, loss=64.408, backward_time=0.137, grad_norm=26.236, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.068, optim0_lr0=3.835e-04, train_time=2.540 +[gpuc01:0/16] 2024-01-24 14:33:55,194 (trainer:737) INFO: 7epoch:train:24001-24100batch: iter_time=1.974e-04, forward_time=0.115, loss_ctc=58.978, loss_att=53.472, acc=0.681, loss=55.124, backward_time=0.137, grad_norm=27.630, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.834e-04, train_time=2.511 +[gpuc01:0/16] 2024-01-24 14:36:01,174 (trainer:737) INFO: 7epoch:train:24101-24200batch: iter_time=1.872e-04, forward_time=0.116, loss_ctc=65.489, loss_att=71.554, acc=0.661, loss=69.734, backward_time=0.138, grad_norm=31.009, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.833e-04, train_time=2.519 +[gpuc01:0/16] 2024-01-24 14:38:07,166 (trainer:737) INFO: 7epoch:train:24201-24300batch: iter_time=1.785e-04, forward_time=0.117, loss_ctc=64.014, loss_att=62.086, acc=0.699, loss=62.664, backward_time=0.138, grad_norm=25.946, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.832e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 14:40:12,860 (trainer:737) INFO: 7epoch:train:24301-24400batch: iter_time=2.033e-04, forward_time=0.116, loss_ctc=64.981, loss_att=62.928, acc=0.669, loss=63.544, backward_time=0.137, grad_norm=25.888, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.832e-04, train_time=2.514 +[gpuc01:0/16] 2024-01-24 14:42:18,785 (trainer:737) INFO: 7epoch:train:24401-24500batch: iter_time=2.108e-04, forward_time=0.116, loss_ctc=58.363, loss_att=61.167, acc=0.675, loss=60.326, backward_time=0.137, grad_norm=23.633, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.831e-04, train_time=2.518 +[gpuc01:0/16] 2024-01-24 14:44:25,043 (trainer:737) INFO: 7epoch:train:24501-24600batch: iter_time=2.148e-04, forward_time=0.116, loss_ctc=55.831, loss_att=60.711, acc=0.692, loss=59.247, backward_time=0.137, grad_norm=24.636, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.830e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 14:46:30,975 (trainer:737) INFO: 7epoch:train:24601-24700batch: iter_time=2.083e-04, forward_time=0.116, loss_ctc=57.776, loss_att=63.447, acc=0.668, loss=61.745, backward_time=0.137, grad_norm=23.621, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.829e-04, train_time=2.518 +[gpuc01:0/16] 2024-01-24 14:48:36,994 (trainer:737) INFO: 7epoch:train:24701-24800batch: iter_time=2.262e-04, forward_time=0.115, loss_ctc=65.255, loss_att=50.611, acc=0.693, loss=55.005, backward_time=0.137, grad_norm=29.550, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.828e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 14:50:43,833 (trainer:737) INFO: 7epoch:train:24801-24900batch: iter_time=2.066e-04, forward_time=0.116, loss_ctc=62.463, loss_att=62.512, acc=0.663, loss=62.497, backward_time=0.138, grad_norm=29.146, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.827e-04, train_time=2.537 +[gpuc01:0/16] 2024-01-24 14:52:50,333 (trainer:737) INFO: 7epoch:train:24901-25000batch: iter_time=1.947e-04, forward_time=0.117, loss_ctc=64.281, loss_att=62.655, acc=0.668, loss=63.143, backward_time=0.137, grad_norm=28.472, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.826e-04, train_time=2.530 +[gpuc01:0/16] 2024-01-24 14:52:52,492 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpuc01:0/16] 2024-01-24 14:53:11,866 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 14:53:15,470 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 14:53:15,470 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpuc01:0/16] 2024-01-24 14:53:15,476 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 14:57:17,989 (trainer:737) INFO: 7epoch:train:25001-25100batch: iter_time=1.388, forward_time=0.116, loss_ctc=63.892, loss_att=65.764, acc=0.666, loss=65.202, backward_time=0.138, grad_norm=30.321, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.825e-04, train_time=5.353 +[gpuc01:0/16] 2024-01-24 14:59:23,413 (trainer:737) INFO: 7epoch:train:25101-25200batch: iter_time=2.456e-04, forward_time=0.116, loss_ctc=59.209, loss_att=57.156, acc=0.699, loss=57.772, backward_time=0.137, grad_norm=25.184, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.824e-04, train_time=2.508 +[gpuc01:0/16] 2024-01-24 15:01:28,004 (trainer:737) INFO: 7epoch:train:25201-25300batch: iter_time=2.623e-04, forward_time=0.115, loss_ctc=60.336, loss_att=56.390, acc=0.684, loss=57.574, backward_time=0.137, grad_norm=25.175, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.823e-04, train_time=2.492 +[gpuc01:0/16] 2024-01-24 15:03:32,911 (trainer:737) INFO: 7epoch:train:25301-25400batch: iter_time=2.579e-04, forward_time=0.116, loss_ctc=66.050, loss_att=64.991, acc=0.692, loss=65.308, backward_time=0.138, grad_norm=27.847, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.822e-04, train_time=2.498 +[gpuc01:0/16] 2024-01-24 15:05:37,897 (trainer:737) INFO: 7epoch:train:25401-25500batch: iter_time=2.361e-04, forward_time=0.116, loss_ctc=71.756, loss_att=61.575, acc=0.694, loss=64.630, backward_time=0.138, grad_norm=28.826, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.821e-04, train_time=2.499 +[gpuc01:0/16] 2024-01-24 15:07:43,480 (trainer:737) INFO: 7epoch:train:25501-25600batch: iter_time=2.515e-04, forward_time=0.115, loss_ctc=57.415, loss_att=58.735, acc=0.697, loss=58.339, backward_time=0.137, grad_norm=23.149, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.820e-04, train_time=2.511 +[gpuc01:0/16] 2024-01-24 15:09:49,512 (trainer:737) INFO: 7epoch:train:25601-25700batch: iter_time=2.488e-04, forward_time=0.115, loss_ctc=56.323, loss_att=52.465, acc=0.669, loss=53.623, backward_time=0.136, grad_norm=27.112, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.819e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 15:11:54,485 (trainer:737) INFO: 7epoch:train:25701-25800batch: iter_time=2.131e-04, forward_time=0.116, loss_ctc=64.295, loss_att=59.453, acc=0.677, loss=60.905, backward_time=0.137, grad_norm=30.210, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.818e-04, train_time=2.499 +[gpuc01:0/16] 2024-01-24 15:13:59,799 (trainer:737) INFO: 7epoch:train:25801-25900batch: iter_time=2.267e-04, forward_time=0.116, loss_ctc=67.870, loss_att=62.519, acc=0.690, loss=64.124, backward_time=0.138, grad_norm=27.000, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.818e-04, train_time=2.506 +[gpuc01:0/16] 2024-01-24 15:16:05,110 (trainer:737) INFO: 7epoch:train:25901-26000batch: iter_time=2.259e-04, forward_time=0.115, loss_ctc=52.834, loss_att=53.878, acc=0.689, loss=53.565, backward_time=0.137, grad_norm=21.404, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.817e-04, train_time=2.506 +[gpuc01:0/16] 2024-01-24 15:18:10,105 (trainer:737) INFO: 7epoch:train:26001-26100batch: iter_time=2.301e-04, forward_time=0.116, loss_ctc=56.199, loss_att=60.674, acc=0.679, loss=59.332, backward_time=0.137, grad_norm=23.001, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.816e-04, train_time=2.500 +[gpuc01:0/16] 2024-01-24 15:20:14,995 (trainer:737) INFO: 7epoch:train:26101-26200batch: iter_time=2.394e-04, forward_time=0.115, loss_ctc=55.730, loss_att=55.015, acc=0.690, loss=55.230, backward_time=0.137, grad_norm=22.531, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.815e-04, train_time=2.498 +[gpuc01:0/16] 2024-01-24 15:22:20,197 (trainer:737) INFO: 7epoch:train:26201-26300batch: iter_time=2.536e-04, forward_time=0.115, loss_ctc=57.211, loss_att=52.355, acc=0.707, loss=53.812, backward_time=0.137, grad_norm=25.337, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.814e-04, train_time=2.504 +[gpuc01:0/16] 2024-01-24 15:24:25,257 (trainer:737) INFO: 7epoch:train:26301-26400batch: iter_time=2.439e-04, forward_time=0.116, loss_ctc=74.434, loss_att=71.113, acc=0.666, loss=72.109, backward_time=0.138, grad_norm=43.402, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.813e-04, train_time=2.501 +[gpuc01:0/16] 2024-01-24 15:26:30,629 (trainer:737) INFO: 7epoch:train:26401-26500batch: iter_time=2.255e-04, forward_time=0.115, loss_ctc=57.686, loss_att=66.091, acc=0.670, loss=63.570, backward_time=0.137, grad_norm=27.586, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.812e-04, train_time=2.507 +[gpuc01:0/16] 2024-01-24 15:28:36,295 (trainer:737) INFO: 7epoch:train:26501-26600batch: iter_time=2.371e-04, forward_time=0.116, loss_ctc=59.051, loss_att=51.088, acc=0.697, loss=53.477, backward_time=0.137, grad_norm=27.382, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.811e-04, train_time=2.513 +[gpuc01:0/16] 2024-01-24 15:30:41,483 (trainer:737) INFO: 7epoch:train:26601-26700batch: iter_time=2.313e-04, forward_time=0.115, loss_ctc=64.201, loss_att=75.840, acc=0.663, loss=72.348, backward_time=0.138, grad_norm=31.402, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.810e-04, train_time=2.504 +[gpuc01:0/16] 2024-01-24 15:32:46,763 (trainer:737) INFO: 7epoch:train:26701-26800batch: iter_time=2.167e-04, forward_time=0.115, loss_ctc=64.542, loss_att=64.325, acc=0.701, loss=64.390, backward_time=0.138, grad_norm=27.131, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.809e-04, train_time=2.505 +[gpuc01:0/16] 2024-01-24 15:34:52,090 (trainer:737) INFO: 7epoch:train:26801-26900batch: iter_time=2.584e-04, forward_time=0.115, loss_ctc=64.766, loss_att=62.343, acc=0.680, loss=63.070, backward_time=0.137, grad_norm=24.646, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.808e-04, train_time=2.506 +[gpuc01:0/16] 2024-01-24 15:36:57,002 (trainer:737) INFO: 7epoch:train:26901-27000batch: iter_time=2.546e-04, forward_time=0.116, loss_ctc=58.203, loss_att=60.411, acc=0.692, loss=59.749, backward_time=0.137, grad_norm=25.580, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.807e-04, train_time=2.498 +[gpuc01:0/16] 2024-01-24 15:39:03,352 (trainer:737) INFO: 7epoch:train:27001-27100batch: iter_time=2.332e-04, forward_time=0.115, loss_ctc=55.620, loss_att=59.438, acc=0.696, loss=58.293, backward_time=0.137, grad_norm=25.862, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.806e-04, train_time=2.527 +[gpuc01:0/16] 2024-01-24 15:41:08,841 (trainer:737) INFO: 7epoch:train:27101-27200batch: iter_time=2.549e-04, forward_time=0.116, loss_ctc=57.387, loss_att=63.540, acc=0.678, loss=61.694, backward_time=0.137, grad_norm=24.405, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.806e-04, train_time=2.510 +[gpuc01:0/16] 2024-01-24 15:43:14,073 (trainer:737) INFO: 7epoch:train:27201-27300batch: iter_time=2.265e-04, forward_time=0.116, loss_ctc=66.745, loss_att=51.581, acc=0.703, loss=56.130, backward_time=0.137, grad_norm=29.988, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.805e-04, train_time=2.504 +[gpuc01:0/16] 2024-01-24 15:45:19,229 (trainer:737) INFO: 7epoch:train:27301-27400batch: iter_time=1.965e-04, forward_time=0.116, loss_ctc=61.701, loss_att=61.911, acc=0.669, loss=61.848, backward_time=0.136, grad_norm=26.492, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.804e-04, train_time=2.503 +[gpuc01:0/16] 2024-01-24 15:47:24,351 (trainer:737) INFO: 7epoch:train:27401-27500batch: iter_time=1.631e-04, forward_time=0.115, loss_ctc=63.033, loss_att=62.752, acc=0.686, loss=62.836, backward_time=0.137, grad_norm=26.671, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.803e-04, train_time=2.502 +[gpuc01:0/16] 2024-01-24 15:47:27,554 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpuc01:0/16] 2024-01-24 15:47:46,074 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 15:47:50,034 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 15:47:50,034 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpuc01:0/16] 2024-01-24 15:47:50,041 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 15:51:53,618 (trainer:737) INFO: 7epoch:train:27501-27600batch: iter_time=1.405, forward_time=0.115, loss_ctc=63.783, loss_att=62.860, acc=0.675, loss=63.137, backward_time=0.138, grad_norm=31.575, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.802e-04, train_time=5.385 +[gpuc01:0/16] 2024-01-24 15:53:59,862 (trainer:737) INFO: 7epoch:train:27601-27700batch: iter_time=2.287e-04, forward_time=0.116, loss_ctc=58.338, loss_att=56.150, acc=0.701, loss=56.807, backward_time=0.137, grad_norm=24.387, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.801e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 15:56:06,002 (trainer:737) INFO: 7epoch:train:27701-27800batch: iter_time=2.536e-04, forward_time=0.115, loss_ctc=59.967, loss_att=54.725, acc=0.688, loss=56.298, backward_time=0.137, grad_norm=23.476, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.068, optim0_lr0=3.800e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 15:58:12,162 (trainer:737) INFO: 7epoch:train:27801-27900batch: iter_time=2.197e-04, forward_time=0.116, loss_ctc=66.015, loss_att=65.465, acc=0.689, loss=65.630, backward_time=0.138, grad_norm=27.745, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.799e-04, train_time=2.523 +[gpuc01:0/16] 2024-01-24 16:00:18,390 (trainer:737) INFO: 7epoch:train:27901-28000batch: iter_time=2.055e-04, forward_time=0.115, loss_ctc=71.140, loss_att=62.166, acc=0.695, loss=64.858, backward_time=0.139, grad_norm=28.398, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.067, optim0_lr0=3.798e-04, train_time=2.524 +[gpuc01:0/16] 2024-01-24 16:02:24,428 (trainer:737) INFO: 7epoch:train:28001-28100batch: iter_time=2.163e-04, forward_time=0.116, loss_ctc=56.700, loss_att=57.517, acc=0.700, loss=57.272, backward_time=0.138, grad_norm=23.478, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.797e-04, train_time=2.521 +[gpuc01:0/16] 2024-01-24 16:04:30,272 (trainer:737) INFO: 7epoch:train:28101-28200batch: iter_time=2.092e-04, forward_time=0.114, loss_ctc=56.311, loss_att=52.118, acc=0.673, loss=53.376, backward_time=0.138, grad_norm=22.917, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.796e-04, train_time=2.517 +[gpuc01:0/16] 2024-01-24 16:06:36,597 (trainer:737) INFO: 7epoch:train:28201-28300batch: iter_time=2.273e-04, forward_time=0.116, loss_ctc=64.664, loss_att=57.463, acc=0.681, loss=59.623, backward_time=0.138, grad_norm=27.550, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.795e-04, train_time=2.526 +[gpuc01:0/16] 2024-01-24 16:08:42,902 (trainer:737) INFO: 7epoch:train:28301-28400batch: iter_time=3.015e-04, forward_time=0.119, loss_ctc=66.226, loss_att=60.638, acc=0.696, loss=62.314, backward_time=0.138, grad_norm=27.357, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.795e-04, train_time=2.526 +[gpuc01:0/16] 2024-01-24 16:10:48,709 (trainer:737) INFO: 7epoch:train:28401-28500batch: iter_time=2.163e-04, forward_time=0.120, loss_ctc=53.333, loss_att=53.675, acc=0.694, loss=53.572, backward_time=0.137, grad_norm=22.577, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.794e-04, train_time=2.516 +[gpuc01:0/16] 2024-01-24 16:12:54,827 (trainer:737) INFO: 7epoch:train:28501-28600batch: iter_time=2.224e-04, forward_time=0.122, loss_ctc=56.555, loss_att=61.005, acc=0.677, loss=59.670, backward_time=0.137, grad_norm=22.946, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.793e-04, train_time=2.522 +[gpuc01:0/16] 2024-01-24 16:15:00,817 (trainer:737) INFO: 7epoch:train:28601-28700batch: iter_time=2.218e-04, forward_time=0.119, loss_ctc=55.494, loss_att=54.062, acc=0.695, loss=54.492, backward_time=0.137, grad_norm=24.332, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.792e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 16:17:07,370 (trainer:737) INFO: 7epoch:train:28701-28800batch: iter_time=2.995e-04, forward_time=0.114, loss_ctc=56.846, loss_att=51.890, acc=0.709, loss=53.377, backward_time=0.138, grad_norm=23.617, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.791e-04, train_time=2.531 +[gpuc01:0/16] 2024-01-24 16:19:13,743 (trainer:737) INFO: 7epoch:train:28801-28900batch: iter_time=2.773e-04, forward_time=0.116, loss_ctc=73.620, loss_att=69.733, acc=0.671, loss=70.899, backward_time=0.139, grad_norm=31.302, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.790e-04, train_time=2.527 +[gpuc01:0/16] 2024-01-24 16:21:20,651 (trainer:737) INFO: 7epoch:train:28901-29000batch: iter_time=2.236e-04, forward_time=0.115, loss_ctc=57.057, loss_att=65.601, acc=0.669, loss=63.038, backward_time=0.137, grad_norm=26.144, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.789e-04, train_time=2.538 +[gpuc01:0/16] 2024-01-24 16:23:27,208 (trainer:737) INFO: 7epoch:train:29001-29100batch: iter_time=2.193e-04, forward_time=0.114, loss_ctc=59.028, loss_att=51.379, acc=0.695, loss=53.674, backward_time=0.136, grad_norm=26.799, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.788e-04, train_time=2.531 +[gpuc01:0/16] 2024-01-24 16:25:33,858 (trainer:737) INFO: 7epoch:train:29101-29200batch: iter_time=2.158e-04, forward_time=0.115, loss_ctc=64.861, loss_att=75.950, acc=0.664, loss=72.624, backward_time=0.138, grad_norm=28.709, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.787e-04, train_time=2.533 +[gpuc01:0/16] 2024-01-24 16:27:40,298 (trainer:737) INFO: 7epoch:train:29201-29300batch: iter_time=2.112e-04, forward_time=0.115, loss_ctc=63.116, loss_att=62.873, acc=0.702, loss=62.946, backward_time=0.138, grad_norm=26.521, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.786e-04, train_time=2.529 +[gpuc01:0/16] 2024-01-24 16:29:46,422 (trainer:737) INFO: 7epoch:train:29301-29400batch: iter_time=2.165e-04, forward_time=0.115, loss_ctc=64.441, loss_att=62.551, acc=0.680, loss=63.118, backward_time=0.137, grad_norm=24.327, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.066, optim0_lr0=3.785e-04, train_time=2.522 +[gpuc01:0/16] 2024-01-24 16:31:52,911 (trainer:737) INFO: 7epoch:train:29401-29500batch: iter_time=2.188e-04, forward_time=0.114, loss_ctc=57.810, loss_att=60.849, acc=0.692, loss=59.938, backward_time=0.137, grad_norm=23.530, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.785e-04, train_time=2.530 +[gpuc01:0/16] 2024-01-24 16:33:59,168 (trainer:737) INFO: 7epoch:train:29501-29600batch: iter_time=2.348e-04, forward_time=0.114, loss_ctc=55.109, loss_att=60.406, acc=0.692, loss=58.817, backward_time=0.137, grad_norm=26.949, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.784e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 16:36:08,873 (trainer:737) INFO: 7epoch:train:29601-29700batch: iter_time=1.886e-04, forward_time=0.114, loss_ctc=57.684, loss_att=64.400, acc=0.677, loss=62.385, backward_time=0.137, grad_norm=27.419, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.783e-04, train_time=2.594 +[gpuc01:0/16] 2024-01-24 16:38:15,477 (trainer:737) INFO: 7epoch:train:29701-29800batch: iter_time=2.075e-04, forward_time=0.114, loss_ctc=65.076, loss_att=50.910, acc=0.707, loss=55.160, backward_time=0.137, grad_norm=31.974, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.782e-04, train_time=2.532 +[gpuc01:0/16] 2024-01-24 16:40:21,479 (trainer:737) INFO: 7epoch:train:29801-29900batch: iter_time=2.110e-04, forward_time=0.114, loss_ctc=62.075, loss_att=61.983, acc=0.668, loss=62.010, backward_time=0.137, grad_norm=30.801, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.781e-04, train_time=2.520 +[gpuc01:0/16] 2024-01-24 16:42:27,759 (trainer:737) INFO: 7epoch:train:29901-30000batch: iter_time=1.955e-04, forward_time=0.114, loss_ctc=63.348, loss_att=62.786, acc=0.688, loss=62.954, backward_time=0.137, grad_norm=28.008, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.780e-04, train_time=2.525 +[gpuc01:0/16] 2024-01-24 17:18:48,977 (trainer:343) INFO: 7epoch results: [train] iter_time=0.067, forward_time=0.117, loss_ctc=63.680, loss_att=62.237, acc=0.673, loss=62.670, backward_time=0.137, grad_norm=26.919, clip=100.000, loss_scale=5.872e+19, optim_step_time=0.067, optim0_lr0=3.925e-04, train_time=2.804, time=11 hours, 41 minutes and 4.29 seconds, total_count=135000, gpu_max_cached_mem_GB=25.098, [valid] loss_ctc=56.140, cer_ctc=0.294, loss_att=53.668, acc=0.580, cer=0.415, wer=1.000, loss=54.410, time=36 minutes and 13.51 seconds, total_count=42039, gpu_max_cached_mem_GB=25.098 +[gpuc01:0/16] 2024-01-24 17:18:58,243 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpuc01:0/16] 2024-01-24 17:18:58,261 (trainer:445) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/2epoch.pth +[gpuc01:0/16] 2024-01-24 17:18:58,261 (trainer:272) INFO: 8/45epoch started. Estimated time to finish: 2 weeks, 6 days and 21 hours +[gpuc01:0/16] 2024-01-24 17:18:58,278 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpuc01:0/16] 2024-01-24 17:19:16,389 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpuc01:0/16] 2024-01-24 17:19:19,781 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpuc01:0/16] 2024-01-24 17:19:19,781 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=38055, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpuc01:0/16] 2024-01-24 17:19:19,787 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=38055, mean=128.0, min=128, max=129 +[gpuc01:0/16] 2024-01-24 17:23:15,657 (trainer:737) INFO: 8epoch:train:1-100batch: iter_time=1.319, forward_time=0.118, loss_ctc=70.696, loss_att=74.686, acc=0.679, loss=73.489, backward_time=0.139, grad_norm=28.237, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.779e-04, train_time=5.147 +[gpuc01:0/16] 2024-01-24 17:25:20,661 (trainer:737) INFO: 8epoch:train:101-200batch: iter_time=1.950e-04, forward_time=0.116, loss_ctc=57.839, loss_att=55.395, acc=0.684, loss=56.128, backward_time=0.138, grad_norm=28.965, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.778e-04, train_time=2.500 +[gpuc01:0/16] 2024-01-24 17:27:25,793 (trainer:737) INFO: 8epoch:train:201-300batch: iter_time=1.899e-04, forward_time=0.117, loss_ctc=62.583, loss_att=63.256, acc=0.674, loss=63.054, backward_time=0.139, grad_norm=26.807, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.777e-04, train_time=2.502 +[gpuc01:0/16] 2024-01-24 17:29:31,712 (trainer:737) INFO: 8epoch:train:301-400batch: iter_time=1.965e-04, forward_time=0.117, loss_ctc=62.899, loss_att=69.622, acc=0.675, loss=67.605, backward_time=0.139, grad_norm=27.557, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.776e-04, train_time=2.518 +[gpuc01:0/16] 2024-01-24 17:31:36,818 (trainer:737) INFO: 8epoch:train:401-500batch: iter_time=1.917e-04, forward_time=0.117, loss_ctc=63.634, loss_att=59.670, acc=0.698, loss=60.859, backward_time=0.139, grad_norm=28.627, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.776e-04, train_time=2.502 +[gpuc01:0/16] 2024-01-24 17:33:42,016 (trainer:737) INFO: 8epoch:train:501-600batch: iter_time=1.967e-04, forward_time=0.117, loss_ctc=59.203, loss_att=50.668, acc=0.705, loss=53.228, backward_time=0.138, grad_norm=28.469, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.775e-04, train_time=2.504 +[gpuc01:0/16] 2024-01-24 17:35:47,197 (trainer:737) INFO: 8epoch:train:601-700batch: iter_time=2.039e-04, forward_time=0.116, loss_ctc=63.595, loss_att=64.318, acc=0.664, loss=64.101, backward_time=0.139, grad_norm=25.200, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.774e-04, train_time=2.503 +[gpuc01:0/16] 2024-01-24 17:37:51,926 (trainer:737) INFO: 8epoch:train:701-800batch: iter_time=2.025e-04, forward_time=0.116, loss_ctc=59.763, loss_att=57.125, acc=0.676, loss=57.916, backward_time=0.139, grad_norm=25.372, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.067, optim0_lr0=3.773e-04, train_time=2.494 +[gpuc01:0/16] 2024-01-24 17:39:57,070 (trainer:737) INFO: 8epoch:train:801-900batch: iter_time=2.039e-04, forward_time=0.117, loss_ctc=60.454, loss_att=63.483, acc=0.676, loss=62.574, backward_time=0.139, grad_norm=24.788, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.068, optim0_lr0=3.772e-04, train_time=2.503 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2886934.0 ON gpuc01 CANCELLED AT 2024-01-24T17:41:28 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.17.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.17.log new file mode 100644 index 0000000000000000000000000000000000000000..566b493d0c3befcde7122578c68a2d8b6ba23542 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.17.log @@ -0,0 +1,2080 @@ +# Running on gpub052.delta.ncsa.illinois.edu +# Started at Mon Jan 22 05:52:51 CST 2024 +# SLURMD_NODENAME=gpub052 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2878933 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1706097127 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2878933 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[052,054-056]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1705924327 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[052,054-056]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login01.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=2868225 +# SLURM_TOPOLOGY_ADDR=ss00.ss11.gpub052 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_318fdf1c-27cd-4320-ad3f-342d16f1467f +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_318fdf1c-27cd-4320-ad3f-342d16f1467f +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_318fdf1c-27cd-4320-ad3f-342d16f1467f +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_318fdf1c-27cd-4320-ad3f-342d16f1467f +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_318fdf1c-27cd-4320-ad3f-342d16f1467f +[gpub052:0/16] 2024-01-22 06:04:08,611 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub052:0/16] 2024-01-22 06:04:08,706 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub052:0/16] 2024-01-22 06:04:08,824 (s2t:464) INFO: Vocabulary size: 50002 +[gpub052:0/16] 2024-01-22 06:04:32,348 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub052:0/16] 2024-01-22 06:04:32,354 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub052:0/16] 2024-01-22 06:04:32,354 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub052:0/16] 2024-01-22 06:04:32,354 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub052:0/16] 2024-01-22 06:04:32,356 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub052:0/16] 2024-01-22 06:04:38,741 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 06:04:39,911 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 06:04:39,911 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub052:0/16] 2024-01-22 06:04:39,938 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 06:05:16,876 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub052:2868421:2868421 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2868421:2868421 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2868421:2868421 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub052:0/16] 2024-01-22 06:05:31,064 (trainer:284) INFO: 3/45epoch started +[gpub052:0/16] 2024-01-22 06:05:31,112 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub052:0/16] 2024-01-22 06:05:49,680 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 06:05:53,155 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 06:05:53,155 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub052:0/16] 2024-01-22 06:05:53,158 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub052:2868422:2868422 [1] NCCL INFO cudaDriverVersion 12020 +gpub052:2868422:2868422 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2868422:2868422 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2868422:2868495 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2868422:2868495 [1] NCCL INFO Using network IB +gpub052:2868422:2868495 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub052:2868422:2868495 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub052:2868422:2868495 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub052:2868422:2868495 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub052:2868422:2868495 [1] NCCL INFO Connected all rings +gpub052:2868422:2868495 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub052:2868422:2868495 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub052:2868422:2868495 [1] NCCL INFO Connected all trees +gpub052:2868422:2868495 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub052:2868422:2868495 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2868422:2868495 [1] NCCL INFO comm 0x14e813e0 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub056:538247:538247 [1] NCCL INFO cudaDriverVersion 12020 +gpub056:538247:538247 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:538247:538247 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:538247:538322 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.156<0> +gpub056:538247:538322 [1] NCCL INFO Using network IB +gpub056:538247:538322 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub056:538247:538322 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub056:538247:538322 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub056:538247:538322 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub056:538247:538322 [1] NCCL INFO Connected all rings +gpub056:538247:538322 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub056:538247:538322 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub056:538247:538322 [1] NCCL INFO Connected all trees +gpub056:538247:538322 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub056:538247:538322 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:538247:538322 [1] NCCL INFO comm 0x11edd1a0 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub052:2868424:2868424 [3] NCCL INFO cudaDriverVersion 12020 +gpub052:2868424:2868424 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2868424:2868424 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2868424:2868497 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2868424:2868497 [3] NCCL INFO Using network IB +gpub052:2868424:2868497 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub052:2868424:2868497 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub052:2868424:2868497 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub052:2868424:2868497 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub052:2868424:2868497 [3] NCCL INFO Connected all rings +gpub052:2868424:2868497 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub052:2868424:2868497 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub052:2868424:2868497 [3] NCCL INFO Connected all trees +gpub052:2868424:2868497 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub052:2868424:2868497 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2868424:2868497 [3] NCCL INFO comm 0x127ac190 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub056:538249:538249 [3] NCCL INFO cudaDriverVersion 12020 +gpub056:538249:538249 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:538249:538249 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:538249:538323 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.156<0> +gpub056:538249:538323 [3] NCCL INFO Using network IB +gpub056:538249:538323 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub056:538249:538323 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub056:538249:538323 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/IB/0 +gpub056:538249:538323 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/IB/0 +gpub056:538249:538323 [3] NCCL INFO Connected all rings +gpub056:538249:538323 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub056:538249:538323 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub056:538249:538323 [3] NCCL INFO Connected all trees +gpub056:538249:538323 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub056:538249:538323 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:538249:538323 [3] NCCL INFO comm 0x1523bce0 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub055:132138:132138 [3] NCCL INFO cudaDriverVersion 12020 +gpub055:132138:132138 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.155<0> +gpub055:132138:132138 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub055:132138:132200 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.155<0> +gpub055:132138:132200 [3] NCCL INFO Using network IB +gpub055:132138:132200 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub055:132138:132200 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub055:132138:132200 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub055:132138:132200 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub055:132138:132200 [3] NCCL INFO Connected all rings +gpub055:132138:132200 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub055:132138:132200 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub055:132138:132200 [3] NCCL INFO Connected all trees +gpub055:132138:132200 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub055:132138:132200 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub055:132138:132200 [3] NCCL INFO comm 0x751a62a0 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub055:132136:132136 [1] NCCL INFO cudaDriverVersion 12020 +gpub055:132136:132136 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.155<0> +gpub055:132136:132136 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub055:132136:132199 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.155<0> +gpub055:132136:132199 [1] NCCL INFO Using network IB +gpub055:132136:132199 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub055:132136:132199 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub055:132136:132199 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub055:132136:132199 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub055:132136:132199 [1] NCCL INFO Connected all rings +gpub055:132136:132199 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub055:132136:132199 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub055:132136:132199 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub055:132136:132199 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub055:132136:132199 [1] NCCL INFO Connected all trees +gpub055:132136:132199 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub055:132136:132199 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub055:132136:132199 [1] NCCL INFO comm 0x17d720d0 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub055:132135:132135 [0] NCCL INFO cudaDriverVersion 12020 +gpub055:132135:132135 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.155<0> +gpub055:132135:132135 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub055:132135:132197 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.155<0> +gpub055:132135:132197 [0] NCCL INFO Using network IB +gpub055:132135:132197 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub055:132135:132197 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub055:132135:132197 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub055:132135:132197 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub055:132135:132197 [0] NCCL INFO Connected all rings +gpub055:132135:132197 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub055:132135:132197 [0] NCCL INFO Connected all trees +gpub055:132135:132197 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub055:132135:132197 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub055:132135:132197 [0] NCCL INFO comm 0x15efd860 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub056:538248:538248 [2] NCCL INFO cudaDriverVersion 12020 +gpub056:538248:538248 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:538248:538248 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:538248:538324 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.156<0> +gpub056:538248:538324 [2] NCCL INFO Using network IB +gpub056:538248:538324 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub056:538248:538324 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub056:538248:538324 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub056:538248:538324 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub056:538248:538324 [2] NCCL INFO Connected all rings +gpub056:538248:538324 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub056:538248:538324 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub056:538248:538324 [2] NCCL INFO Connected all trees +gpub056:538248:538324 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub056:538248:538324 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:538248:538324 [2] NCCL INFO comm 0x157385a0 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub052:2868421:2868496 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2868421:2868496 [0] NCCL INFO Using network IB +gpub052:2868421:2868496 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub052:2868421:2868496 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub052:2868421:2868496 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub052:2868421:2868496 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub052:2868421:2868496 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub052:2868421:2868496 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub052:2868421:2868496 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub052:2868421:2868496 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub052:2868421:2868496 [0] NCCL INFO Connected all rings +gpub052:2868421:2868496 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub052:2868421:2868496 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/IB/0 +gpub052:2868421:2868496 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/IB/0 +gpub052:2868421:2868496 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub052:2868421:2868496 [0] NCCL INFO Connected all trees +gpub052:2868421:2868496 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub052:2868421:2868496 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2868421:2868496 [0] NCCL INFO comm 0x16b58ec0 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub055:132137:132137 [2] NCCL INFO cudaDriverVersion 12020 +gpub055:132137:132137 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.155<0> +gpub055:132137:132137 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub055:132137:132198 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.155<0> +gpub055:132137:132198 [2] NCCL INFO Using network IB +gpub055:132137:132198 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub055:132137:132198 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub055:132137:132198 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub055:132137:132198 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub055:132137:132198 [2] NCCL INFO Connected all rings +gpub055:132137:132198 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub055:132137:132198 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub055:132137:132198 [2] NCCL INFO Connected all trees +gpub055:132137:132198 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub055:132137:132198 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub055:132137:132198 [2] NCCL INFO comm 0x1abfe340 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub056:538246:538246 [0] NCCL INFO cudaDriverVersion 12020 +gpub056:538246:538246 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:538246:538246 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:538246:538321 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.156<0> +gpub056:538246:538321 [0] NCCL INFO Using network IB +gpub056:538246:538321 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub056:538246:538321 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub056:538246:538321 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub056:538246:538321 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub056:538246:538321 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub056:538246:538321 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub056:538246:538321 [0] NCCL INFO Connected all rings +gpub056:538246:538321 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub056:538246:538321 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub056:538246:538321 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub056:538246:538321 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub056:538246:538321 [0] NCCL INFO Connected all trees +gpub056:538246:538321 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub056:538246:538321 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:538246:538321 [0] NCCL INFO comm 0x15674980 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub052:2868423:2868423 [2] NCCL INFO cudaDriverVersion 12020 +gpub052:2868423:2868423 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2868423:2868423 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2868423:2868494 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2868423:2868494 [2] NCCL INFO Using network IB +gpub052:2868423:2868494 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub052:2868423:2868494 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub052:2868423:2868494 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub052:2868423:2868494 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub052:2868423:2868494 [2] NCCL INFO Connected all rings +gpub052:2868423:2868494 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub052:2868423:2868494 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub052:2868423:2868494 [2] NCCL INFO Connected all trees +gpub052:2868423:2868494 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub052:2868423:2868494 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2868423:2868494 [2] NCCL INFO comm 0x181269b0 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub054:3241515:3241515 [2] NCCL INFO cudaDriverVersion 12020 +gpub054:3241515:3241515 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3241515:3241515 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3241515:3241576 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3241515:3241576 [2] NCCL INFO Using network IB +gpub054:3241515:3241576 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub054:3241515:3241576 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub054:3241515:3241576 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub054:3241515:3241576 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub054:3241515:3241576 [2] NCCL INFO Connected all rings +gpub054:3241515:3241576 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub054:3241515:3241576 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub054:3241515:3241576 [2] NCCL INFO Connected all trees +gpub054:3241515:3241576 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub054:3241515:3241576 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3241515:3241576 [2] NCCL INFO comm 0x10a74200 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub054:3241514:3241514 [1] NCCL INFO cudaDriverVersion 12020 +gpub054:3241514:3241514 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3241514:3241514 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3241514:3241577 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3241514:3241577 [1] NCCL INFO Using network IB +gpub054:3241514:3241577 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub054:3241514:3241577 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub054:3241514:3241577 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub054:3241514:3241577 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub054:3241514:3241577 [1] NCCL INFO Connected all rings +gpub054:3241514:3241577 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub054:3241514:3241577 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub054:3241514:3241577 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub054:3241514:3241577 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub054:3241514:3241577 [1] NCCL INFO Connected all trees +gpub054:3241514:3241577 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub054:3241514:3241577 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3241514:3241577 [1] NCCL INFO comm 0x15ce2950 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub054:3241516:3241516 [3] NCCL INFO cudaDriverVersion 12020 +gpub054:3241516:3241516 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3241516:3241516 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3241516:3241575 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3241516:3241575 [3] NCCL INFO Using network IB +gpub054:3241516:3241575 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub054:3241516:3241575 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub054:3241516:3241575 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub054:3241516:3241575 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub054:3241516:3241575 [3] NCCL INFO Connected all rings +gpub054:3241516:3241575 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub054:3241516:3241575 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub054:3241516:3241575 [3] NCCL INFO Connected all trees +gpub054:3241516:3241575 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub054:3241516:3241575 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3241516:3241575 [3] NCCL INFO comm 0x14911a10 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub054:3241513:3241513 [0] NCCL INFO cudaDriverVersion 12020 +gpub054:3241513:3241513 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3241513:3241513 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3241513:3241574 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3241513:3241574 [0] NCCL INFO Using network IB +gpub054:3241513:3241574 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub054:3241513:3241574 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub054:3241513:3241574 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub054:3241513:3241574 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub054:3241513:3241574 [0] NCCL INFO Connected all rings +gpub054:3241513:3241574 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub054:3241513:3241574 [0] NCCL INFO Connected all trees +gpub054:3241513:3241574 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub054:3241513:3241574 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3241513:3241574 [0] NCCL INFO comm 0x129ab3a0 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +[gpub052:0/16] 2024-01-22 06:21:48,296 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub052:0/16] 2024-01-22 06:27:01,567 (trainer:737) INFO: 3epoch:train:1-100batch: iter_time=6.494, forward_time=0.636, loss_ctc=164.980, loss_att=197.734, acc=0.226, loss=187.908, backward_time=0.471, grad_norm=67.384, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.107, optim0_lr0=5.077e-05, train_time=12.857 +[gpub052:0/16] 2024-01-22 06:33:41,819 (trainer:737) INFO: 3epoch:train:101-200batch: iter_time=1.010e-04, forward_time=0.642, loss_ctc=150.521, loss_att=196.377, acc=0.231, loss=182.620, backward_time=0.751, grad_norm=55.752, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.129, optim0_lr0=5.227e-05, train_time=4.049 +[gpub052:0/16] 2024-01-22 06:42:28,676 (trainer:737) INFO: 3epoch:train:201-300batch: iter_time=9.928e-05, forward_time=0.314, loss_ctc=156.589, loss_att=201.152, acc=0.223, loss=187.783, backward_time=0.417, grad_norm=60.450, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.100, optim0_lr0=5.377e-05, train_time=5.269 +[gpub052:0/16] 2024-01-22 06:51:06,256 (trainer:737) INFO: 3epoch:train:301-400batch: iter_time=1.048e-04, forward_time=0.460, loss_ctc=160.480, loss_att=204.048, acc=0.224, loss=190.978, backward_time=0.546, grad_norm=55.267, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.107, optim0_lr0=5.527e-05, train_time=5.174 +[gpub052:0/16] 2024-01-22 06:59:11,131 (trainer:737) INFO: 3epoch:train:401-500batch: iter_time=1.028e-04, forward_time=0.401, loss_ctc=167.439, loss_att=202.769, acc=0.228, loss=192.170, backward_time=0.400, grad_norm=70.942, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.097, optim0_lr0=5.677e-05, train_time=4.850 +[gpub052:0/16] 2024-01-22 07:06:51,276 (trainer:737) INFO: 3epoch:train:501-600batch: iter_time=0.001, forward_time=0.552, loss_ctc=150.975, loss_att=192.250, acc=0.226, loss=179.868, backward_time=0.513, grad_norm=62.260, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.105, optim0_lr0=5.827e-05, train_time=4.601 +[gpub052:0/16] 2024-01-22 07:15:37,430 (trainer:737) INFO: 3epoch:train:601-700batch: iter_time=9.891e-05, forward_time=0.298, loss_ctc=151.548, loss_att=194.596, acc=0.231, loss=181.681, backward_time=0.397, grad_norm=54.993, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.093, optim0_lr0=5.977e-05, train_time=5.261 +[gpub052:0/16] 2024-01-22 07:24:22,360 (trainer:737) INFO: 3epoch:train:701-800batch: iter_time=1.041e-04, forward_time=0.537, loss_ctc=185.442, loss_att=214.856, acc=0.220, loss=206.032, backward_time=0.482, grad_norm=79.463, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.111, optim0_lr0=6.127e-05, train_time=5.250 +[gpub052:0/16] 2024-01-22 07:32:30,100 (trainer:737) INFO: 3epoch:train:801-900batch: iter_time=1.008e-04, forward_time=0.302, loss_ctc=155.284, loss_att=210.979, acc=0.223, loss=194.271, backward_time=0.412, grad_norm=52.703, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.093, optim0_lr0=6.277e-05, train_time=4.876 +[gpub052:0/16] 2024-01-22 07:40:51,113 (trainer:737) INFO: 3epoch:train:901-1000batch: iter_time=9.960e-05, forward_time=0.306, loss_ctc=154.244, loss_att=209.748, acc=0.225, loss=193.097, backward_time=0.409, grad_norm=59.505, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.094, optim0_lr0=6.427e-05, train_time=5.010 +[gpub052:0/16] 2024-01-22 07:49:21,343 (trainer:737) INFO: 3epoch:train:1001-1100batch: iter_time=0.002, forward_time=0.438, loss_ctc=148.678, loss_att=177.528, acc=0.237, loss=168.873, backward_time=0.430, grad_norm=64.546, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.103, optim0_lr0=6.577e-05, train_time=5.103 +[gpub052:0/16] 2024-01-22 07:58:13,809 (trainer:737) INFO: 3epoch:train:1101-1200batch: iter_time=9.354e-05, forward_time=0.471, loss_ctc=161.262, loss_att=208.027, acc=0.222, loss=193.998, backward_time=0.493, grad_norm=56.999, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.110, optim0_lr0=6.727e-05, train_time=5.325 +[gpub052:0/16] 2024-01-22 08:01:02,472 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub052:0/16] 2024-01-22 08:01:22,116 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 08:01:25,612 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 08:01:25,612 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub052:0/16] 2024-01-22 08:01:25,753 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 08:15:18,540 (trainer:737) INFO: 3epoch:train:1201-1300batch: iter_time=7.221, forward_time=0.585, loss_ctc=145.304, loss_att=176.806, acc=0.244, loss=167.355, backward_time=0.568, grad_norm=68.496, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.127, optim0_lr0=6.877e-05, train_time=10.247 +[gpub052:0/16] 2024-01-22 08:18:40,992 (trainer:737) INFO: 3epoch:train:1301-1400batch: iter_time=8.559e-05, forward_time=0.402, loss_ctc=169.056, loss_att=209.661, acc=0.218, loss=197.480, backward_time=0.465, grad_norm=71.123, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.100, optim0_lr0=7.027e-05, train_time=2.019 +[gpub052:0/16] 2024-01-22 08:23:15,208 (trainer:737) INFO: 3epoch:train:1401-1500batch: iter_time=0.001, forward_time=0.420, loss_ctc=153.125, loss_att=207.059, acc=0.232, loss=190.879, backward_time=0.514, grad_norm=56.455, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=7.177e-05, train_time=2.747 +[gpub052:0/16] 2024-01-22 08:25:11,247 (trainer:737) INFO: 3epoch:train:1501-1600batch: iter_time=8.523e-05, forward_time=0.303, loss_ctc=139.006, loss_att=175.714, acc=0.227, loss=164.702, backward_time=0.409, grad_norm=52.574, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.094, optim0_lr0=7.327e-05, train_time=1.159 +[gpub052:0/16] 2024-01-22 08:31:13,111 (trainer:737) INFO: 3epoch:train:1601-1700batch: iter_time=8.218e-05, forward_time=0.488, loss_ctc=173.144, loss_att=230.888, acc=0.226, loss=213.565, backward_time=0.446, grad_norm=71.737, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.119, optim0_lr0=7.477e-05, train_time=3.618 +[gpub052:0/16] 2024-01-22 08:36:01,687 (trainer:737) INFO: 3epoch:train:1701-1800batch: iter_time=8.366e-05, forward_time=0.612, loss_ctc=145.179, loss_att=186.708, acc=0.227, loss=174.250, backward_time=0.577, grad_norm=59.156, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=7.627e-05, train_time=2.886 +[gpub052:0/16] 2024-01-22 08:39:21,293 (trainer:737) INFO: 3epoch:train:1801-1900batch: iter_time=8.639e-05, forward_time=0.308, loss_ctc=142.561, loss_att=184.296, acc=0.234, loss=171.775, backward_time=0.399, grad_norm=55.681, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.094, optim0_lr0=7.777e-05, train_time=1.996 +[gpub052:0/16] 2024-01-22 08:42:24,519 (trainer:737) INFO: 3epoch:train:1901-2000batch: iter_time=8.347e-05, forward_time=0.483, loss_ctc=160.533, loss_att=203.524, acc=0.223, loss=190.627, backward_time=0.480, grad_norm=61.743, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=7.927e-05, train_time=1.832 +[gpub052:0/16] 2024-01-22 08:46:02,235 (trainer:737) INFO: 3epoch:train:2001-2100batch: iter_time=2.982e-04, forward_time=0.339, loss_ctc=177.157, loss_att=227.191, acc=0.222, loss=212.181, backward_time=0.420, grad_norm=68.869, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.094, optim0_lr0=8.077e-05, train_time=2.177 +[gpub052:0/16] 2024-01-22 08:49:45,585 (trainer:737) INFO: 3epoch:train:2101-2200batch: iter_time=8.126e-05, forward_time=0.311, loss_ctc=131.064, loss_att=188.862, acc=0.230, loss=171.522, backward_time=0.406, grad_norm=47.072, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.095, optim0_lr0=8.227e-05, train_time=2.234 +[gpub052:0/16] 2024-01-22 08:53:25,192 (trainer:737) INFO: 3epoch:train:2201-2300batch: iter_time=5.779e-04, forward_time=0.710, loss_ctc=167.004, loss_att=210.465, acc=0.231, loss=197.427, backward_time=0.517, grad_norm=69.648, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=8.377e-05, train_time=2.195 +[gpub052:0/16] 2024-01-22 08:55:56,045 (trainer:737) INFO: 3epoch:train:2301-2400batch: iter_time=8.277e-05, forward_time=0.291, loss_ctc=134.715, loss_att=176.637, acc=0.231, loss=164.060, backward_time=0.401, grad_norm=50.295, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.093, optim0_lr0=8.527e-05, train_time=1.508 +[gpub052:0/16] 2024-01-22 08:58:39,210 (trainer:737) INFO: 3epoch:train:2401-2500batch: iter_time=8.283e-05, forward_time=0.352, loss_ctc=157.680, loss_att=204.418, acc=0.232, loss=190.397, backward_time=0.483, grad_norm=62.509, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.097, optim0_lr0=8.677e-05, train_time=1.632 +[gpub052:0/16] 2024-01-22 08:58:59,653 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub052:0/16] 2024-01-22 08:59:27,378 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 08:59:35,647 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 08:59:35,647 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub052:0/16] 2024-01-22 08:59:35,650 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 09:15:47,093 (trainer:737) INFO: 3epoch:train:2501-2600batch: iter_time=7.486, forward_time=0.565, loss_ctc=154.247, loss_att=194.634, acc=0.229, loss=182.518, backward_time=0.445, grad_norm=66.285, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.100, optim0_lr0=8.827e-05, train_time=10.279 +[gpub052:0/16] 2024-01-22 09:18:59,032 (trainer:737) INFO: 3epoch:train:2601-2700batch: iter_time=8.319e-05, forward_time=0.302, loss_ctc=140.913, loss_att=195.390, acc=0.233, loss=179.047, backward_time=0.405, grad_norm=58.382, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.093, optim0_lr0=8.977e-05, train_time=1.918 +[gpub052:0/16] 2024-01-22 09:22:19,384 (trainer:737) INFO: 3epoch:train:2701-2800batch: iter_time=8.127e-05, forward_time=0.346, loss_ctc=144.933, loss_att=196.660, acc=0.228, loss=181.142, backward_time=0.428, grad_norm=59.116, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.106, optim0_lr0=9.127e-05, train_time=1.995 +[gpub052:0/16] 2024-01-22 09:26:47,946 (trainer:737) INFO: 3epoch:train:2801-2900batch: iter_time=0.008, forward_time=0.682, loss_ctc=147.272, loss_att=200.281, acc=0.230, loss=184.378, backward_time=0.567, grad_norm=52.701, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.130, optim0_lr0=9.277e-05, train_time=2.695 +[gpub052:0/16] 2024-01-22 09:29:44,987 (trainer:737) INFO: 3epoch:train:2901-3000batch: iter_time=8.146e-05, forward_time=0.290, loss_ctc=153.016, loss_att=199.874, acc=0.229, loss=185.816, backward_time=0.400, grad_norm=66.976, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.093, optim0_lr0=9.427e-05, train_time=1.770 +[gpub052:0/16] 2024-01-22 09:32:18,202 (trainer:737) INFO: 3epoch:train:3001-3100batch: iter_time=7.928e-05, forward_time=0.306, loss_ctc=139.379, loss_att=189.704, acc=0.229, loss=174.607, backward_time=0.411, grad_norm=54.976, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.094, optim0_lr0=9.577e-05, train_time=1.531 +[gpub052:0/16] 2024-01-22 09:35:34,343 (trainer:737) INFO: 3epoch:train:3101-3200batch: iter_time=8.374e-05, forward_time=0.498, loss_ctc=138.664, loss_att=191.404, acc=0.235, loss=175.582, backward_time=0.555, grad_norm=47.746, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=9.727e-05, train_time=1.961 +[gpub052:0/16] 2024-01-22 09:38:01,704 (trainer:737) INFO: 3epoch:train:3201-3300batch: iter_time=8.584e-05, forward_time=0.334, loss_ctc=169.685, loss_att=208.269, acc=0.224, loss=196.694, backward_time=0.420, grad_norm=71.885, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.105, optim0_lr0=9.877e-05, train_time=1.475 +[gpub052:0/16] 2024-01-22 09:41:07,046 (trainer:737) INFO: 3epoch:train:3301-3400batch: iter_time=8.240e-05, forward_time=0.349, loss_ctc=140.957, loss_att=207.466, acc=0.226, loss=187.513, backward_time=0.406, grad_norm=50.400, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.096, optim0_lr0=1.003e-04, train_time=1.853 +[gpub052:0/16] 2024-01-22 09:43:56,596 (trainer:737) INFO: 3epoch:train:3401-3500batch: iter_time=8.311e-05, forward_time=0.394, loss_ctc=140.608, loss_att=205.674, acc=0.230, loss=186.155, backward_time=0.478, grad_norm=55.737, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.101, optim0_lr0=1.018e-04, train_time=1.695 +[gpub052:0/16] 2024-01-22 09:46:19,935 (trainer:737) INFO: 3epoch:train:3501-3600batch: iter_time=8.199e-05, forward_time=0.420, loss_ctc=137.426, loss_att=175.183, acc=0.239, loss=163.856, backward_time=0.431, grad_norm=60.795, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.106, optim0_lr0=1.033e-04, train_time=1.434 +[gpub052:0/16] 2024-01-22 09:48:44,130 (trainer:737) INFO: 3epoch:train:3601-3700batch: iter_time=7.406e-04, forward_time=0.303, loss_ctc=149.556, loss_att=204.519, acc=0.227, loss=188.030, backward_time=0.409, grad_norm=54.432, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.094, optim0_lr0=1.048e-04, train_time=1.442 +[gpub052:0/16] 2024-01-22 09:50:37,660 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub052:0/16] 2024-01-22 09:50:57,361 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 09:51:00,886 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 09:51:00,886 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub052:0/16] 2024-01-22 09:51:00,940 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 10:05:09,069 (trainer:737) INFO: 3epoch:train:3701-3800batch: iter_time=7.668, forward_time=0.599, loss_ctc=132.951, loss_att=171.333, acc=0.249, loss=159.818, backward_time=0.458, grad_norm=58.118, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.098, optim0_lr0=1.063e-04, train_time=9.849 +[gpub052:0/16] 2024-01-22 10:08:26,145 (trainer:737) INFO: 3epoch:train:3801-3900batch: iter_time=8.715e-05, forward_time=0.378, loss_ctc=156.236, loss_att=201.892, acc=0.223, loss=188.195, backward_time=0.424, grad_norm=65.020, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.097, optim0_lr0=1.078e-04, train_time=1.971 +[gpub052:0/16] 2024-01-22 10:10:44,942 (trainer:737) INFO: 3epoch:train:3901-4000batch: iter_time=7.922e-05, forward_time=0.311, loss_ctc=140.426, loss_att=200.512, acc=0.235, loss=182.486, backward_time=0.407, grad_norm=56.636, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.093, optim0_lr0=1.093e-04, train_time=1.388 +[gpub052:0/16] 2024-01-22 10:15:28,493 (trainer:737) INFO: 3epoch:train:4001-4100batch: iter_time=9.056e-05, forward_time=0.580, loss_ctc=128.725, loss_att=168.593, acc=0.232, loss=156.632, backward_time=0.569, grad_norm=54.399, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.118, optim0_lr0=1.108e-04, train_time=2.835 +[gpub052:0/16] 2024-01-22 10:17:57,560 (trainer:737) INFO: 3epoch:train:4101-4200batch: iter_time=8.383e-05, forward_time=0.295, loss_ctc=157.820, loss_att=221.617, acc=0.228, loss=202.478, backward_time=0.413, grad_norm=63.832, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.095, optim0_lr0=1.123e-04, train_time=1.491 +[gpub052:0/16] 2024-01-22 10:22:16,660 (trainer:737) INFO: 3epoch:train:4201-4300batch: iter_time=8.587e-05, forward_time=0.552, loss_ctc=133.745, loss_att=180.381, acc=0.235, loss=166.390, backward_time=0.536, grad_norm=55.128, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.117, optim0_lr0=1.138e-04, train_time=2.591 +[gpub052:0/16] 2024-01-22 10:24:37,525 (trainer:737) INFO: 3epoch:train:4301-4400batch: iter_time=3.150e-04, forward_time=0.367, loss_ctc=131.485, loss_att=178.843, acc=0.238, loss=164.636, backward_time=0.452, grad_norm=51.009, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.106, optim0_lr0=1.153e-04, train_time=1.409 +[gpub052:0/16] 2024-01-22 10:28:14,650 (trainer:737) INFO: 3epoch:train:4401-4500batch: iter_time=8.056e-05, forward_time=0.311, loss_ctc=147.533, loss_att=197.604, acc=0.226, loss=182.582, backward_time=0.404, grad_norm=58.995, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.094, optim0_lr0=1.168e-04, train_time=2.171 +[gpub052:0/16] 2024-01-22 10:31:29,485 (trainer:737) INFO: 3epoch:train:4501-4600batch: iter_time=8.017e-05, forward_time=0.544, loss_ctc=162.988, loss_att=220.192, acc=0.228, loss=203.031, backward_time=0.483, grad_norm=66.509, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.111, optim0_lr0=1.183e-04, train_time=1.947 +[gpub052:0/16] 2024-01-22 10:34:33,158 (trainer:737) INFO: 3epoch:train:4601-4700batch: iter_time=8.218e-05, forward_time=0.352, loss_ctc=119.832, loss_att=183.299, acc=0.238, loss=164.259, backward_time=0.412, grad_norm=45.915, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.095, optim0_lr0=1.198e-04, train_time=1.837 +[gpub052:0/16] 2024-01-22 10:38:17,264 (trainer:737) INFO: 3epoch:train:4701-4800batch: iter_time=8.292e-05, forward_time=0.312, loss_ctc=150.181, loss_att=203.621, acc=0.239, loss=187.589, backward_time=0.422, grad_norm=67.959, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.095, optim0_lr0=1.213e-04, train_time=2.241 +[gpub052:0/16] 2024-01-22 10:41:17,007 (trainer:737) INFO: 3epoch:train:4801-4900batch: iter_time=0.001, forward_time=0.579, loss_ctc=122.872, loss_att=172.391, acc=0.235, loss=157.535, backward_time=0.490, grad_norm=46.840, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.110, optim0_lr0=1.228e-04, train_time=1.796 +[gpub052:0/16] 2024-01-22 10:45:07,890 (trainer:737) INFO: 3epoch:train:4901-5000batch: iter_time=8.311e-05, forward_time=0.291, loss_ctc=143.348, loss_att=197.518, acc=0.239, loss=181.267, backward_time=0.401, grad_norm=57.751, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.093, optim0_lr0=1.243e-04, train_time=2.309 +[gpub052:0/16] 2024-01-22 10:45:28,460 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub052:0/16] 2024-01-22 10:45:47,895 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 10:45:51,473 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 10:45:51,473 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub052:0/16] 2024-01-22 10:45:51,627 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 11:00:39,624 (trainer:737) INFO: 3epoch:train:5001-5100batch: iter_time=7.483, forward_time=0.520, loss_ctc=142.672, loss_att=186.315, acc=0.235, loss=173.222, backward_time=0.467, grad_norm=68.777, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.100, optim0_lr0=1.258e-04, train_time=9.318 +[gpub052:0/16] 2024-01-22 11:03:23,942 (trainer:737) INFO: 3epoch:train:5101-5200batch: iter_time=8.039e-05, forward_time=0.402, loss_ctc=129.485, loss_att=185.561, acc=0.241, loss=168.738, backward_time=0.468, grad_norm=53.740, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.098, optim0_lr0=1.273e-04, train_time=1.643 +[gpub052:0/16] 2024-01-22 11:07:01,565 (trainer:737) INFO: 3epoch:train:5201-5300batch: iter_time=9.143e-05, forward_time=0.294, loss_ctc=133.725, loss_att=189.988, acc=0.233, loss=173.109, backward_time=0.398, grad_norm=56.950, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.093, optim0_lr0=1.288e-04, train_time=2.176 +[gpub052:0/16] 2024-01-22 11:09:42,971 (trainer:737) INFO: 3epoch:train:5301-5400batch: iter_time=9.245e-05, forward_time=0.492, loss_ctc=134.919, loss_att=189.130, acc=0.236, loss=172.867, backward_time=0.493, grad_norm=51.759, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.115, optim0_lr0=1.303e-04, train_time=1.614 +[gpub052:0/16] 2024-01-22 11:12:43,536 (trainer:737) INFO: 3epoch:train:5401-5500batch: iter_time=8.873e-05, forward_time=0.347, loss_ctc=141.931, loss_att=189.810, acc=0.240, loss=175.446, backward_time=0.444, grad_norm=61.576, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.101, optim0_lr0=1.318e-04, train_time=1.806 +[gpub052:0/16] 2024-01-22 11:15:50,113 (trainer:737) INFO: 3epoch:train:5501-5600batch: iter_time=8.348e-05, forward_time=0.350, loss_ctc=128.262, loss_att=181.028, acc=0.237, loss=165.198, backward_time=0.417, grad_norm=53.868, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.096, optim0_lr0=1.333e-04, train_time=1.863 +[gpub052:0/16] 2024-01-22 11:18:29,457 (trainer:737) INFO: 3epoch:train:5601-5700batch: iter_time=8.615e-05, forward_time=0.308, loss_ctc=127.859, loss_att=186.418, acc=0.241, loss=168.850, backward_time=0.405, grad_norm=46.801, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.094, optim0_lr0=1.348e-04, train_time=1.596 +[gpub052:0/16] 2024-01-22 11:21:43,079 (trainer:737) INFO: 3epoch:train:5701-5800batch: iter_time=8.943e-05, forward_time=0.573, loss_ctc=155.028, loss_att=201.503, acc=0.232, loss=187.561, backward_time=0.500, grad_norm=64.143, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.111, optim0_lr0=1.363e-04, train_time=1.936 +[gpub052:0/16] 2024-01-22 11:24:20,335 (trainer:737) INFO: 3epoch:train:5801-5900batch: iter_time=0.002, forward_time=0.321, loss_ctc=129.999, loss_att=200.340, acc=0.237, loss=179.238, backward_time=0.425, grad_norm=55.681, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.096, optim0_lr0=1.378e-04, train_time=1.573 +[gpub052:0/16] 2024-01-22 11:27:11,640 (trainer:737) INFO: 3epoch:train:5901-6000batch: iter_time=8.782e-05, forward_time=0.305, loss_ctc=130.360, loss_att=197.616, acc=0.240, loss=177.439, backward_time=0.404, grad_norm=57.867, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.094, optim0_lr0=1.393e-04, train_time=1.709 +[gpub052:0/16] 2024-01-22 11:30:07,891 (trainer:737) INFO: 3epoch:train:6001-6100batch: iter_time=8.813e-05, forward_time=0.477, loss_ctc=128.133, loss_att=171.484, acc=0.250, loss=158.479, backward_time=0.496, grad_norm=52.130, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.102, optim0_lr0=1.408e-04, train_time=1.766 +[gpub052:0/16] 2024-01-22 11:32:50,445 (trainer:737) INFO: 3epoch:train:6101-6200batch: iter_time=5.408e-04, forward_time=0.330, loss_ctc=140.445, loss_att=196.492, acc=0.237, loss=179.678, backward_time=0.430, grad_norm=54.500, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.096, optim0_lr0=1.423e-04, train_time=1.625 +[gpub052:0/16] 2024-01-22 11:34:05,725 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub052:0/16] 2024-01-22 11:34:25,860 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 11:34:29,505 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 11:34:29,505 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub052:0/16] 2024-01-22 11:34:29,509 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 11:45:06,268 (trainer:737) INFO: 3epoch:train:6201-6300batch: iter_time=6.035, forward_time=0.427, loss_ctc=121.718, loss_att=165.145, acc=0.256, loss=152.117, backward_time=0.430, grad_norm=51.980, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.099, optim0_lr0=1.438e-04, train_time=7.358 +[gpub052:0/16] 2024-01-22 11:47:28,870 (trainer:737) INFO: 3epoch:train:6301-6400batch: iter_time=8.115e-05, forward_time=0.340, loss_ctc=146.462, loss_att=195.552, acc=0.233, loss=180.825, backward_time=0.416, grad_norm=70.060, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.096, optim0_lr0=1.453e-04, train_time=1.426 +[gpub052:0/16] 2024-01-22 11:50:35,541 (trainer:737) INFO: 3epoch:train:6401-6500batch: iter_time=8.088e-05, forward_time=0.487, loss_ctc=131.667, loss_att=194.524, acc=0.248, loss=175.667, backward_time=0.482, grad_norm=53.765, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.112, optim0_lr0=1.468e-04, train_time=1.867 +[gpub052:0/16] 2024-01-22 11:52:33,074 (trainer:737) INFO: 3epoch:train:6501-6600batch: iter_time=8.187e-05, forward_time=0.340, loss_ctc=119.028, loss_att=163.532, acc=0.244, loss=150.181, backward_time=0.417, grad_norm=53.392, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.094, optim0_lr0=1.483e-04, train_time=1.175 +[gpub052:0/16] 2024-01-22 11:55:55,662 (trainer:737) INFO: 3epoch:train:6601-6700batch: iter_time=8.567e-05, forward_time=0.396, loss_ctc=144.101, loss_att=212.186, acc=0.244, loss=191.760, backward_time=0.563, grad_norm=58.817, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.109, optim0_lr0=1.498e-04, train_time=2.026 +[gpub052:0/16] 2024-01-22 11:58:33,487 (trainer:737) INFO: 3epoch:train:6701-6800batch: iter_time=9.669e-05, forward_time=0.328, loss_ctc=123.910, loss_att=176.350, acc=0.248, loss=160.618, backward_time=0.415, grad_norm=51.722, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.095, optim0_lr0=1.513e-04, train_time=1.578 +[gpub052:0/16] 2024-01-22 12:01:09,804 (trainer:737) INFO: 3epoch:train:6801-6900batch: iter_time=8.000e-05, forward_time=0.465, loss_ctc=119.628, loss_att=173.371, acc=0.253, loss=157.248, backward_time=0.456, grad_norm=48.179, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.103, optim0_lr0=1.528e-04, train_time=1.562 +[gpub052:0/16] 2024-01-22 12:03:33,982 (trainer:737) INFO: 3epoch:train:6901-7000batch: iter_time=3.146e-04, forward_time=0.331, loss_ctc=134.801, loss_att=190.981, acc=0.241, loss=174.127, backward_time=0.413, grad_norm=52.236, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.100, optim0_lr0=1.543e-04, train_time=1.442 +[gpub052:0/16] 2024-01-22 12:06:25,646 (trainer:737) INFO: 3epoch:train:7001-7100batch: iter_time=8.525e-05, forward_time=0.401, loss_ctc=148.449, loss_att=211.927, acc=0.242, loss=192.883, backward_time=0.486, grad_norm=62.208, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.106, optim0_lr0=1.558e-04, train_time=1.717 +[gpub052:0/16] 2024-01-22 12:08:57,819 (trainer:737) INFO: 3epoch:train:7101-7200batch: iter_time=1.019e-04, forward_time=0.327, loss_ctc=110.051, loss_att=175.195, acc=0.259, loss=155.652, backward_time=0.417, grad_norm=42.062, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.096, optim0_lr0=1.573e-04, train_time=1.520 +[gpub052:0/16] 2024-01-22 12:11:33,750 (trainer:737) INFO: 3epoch:train:7201-7300batch: iter_time=8.209e-05, forward_time=0.442, loss_ctc=141.005, loss_att=196.020, acc=0.259, loss=179.516, backward_time=0.473, grad_norm=63.356, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.110, optim0_lr0=1.588e-04, train_time=1.560 +[gpub052:0/16] 2024-01-22 12:14:09,665 (trainer:737) INFO: 3epoch:train:7301-7400batch: iter_time=0.001, forward_time=0.345, loss_ctc=113.672, loss_att=164.812, acc=0.254, loss=149.470, backward_time=0.418, grad_norm=44.589, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.095, optim0_lr0=1.603e-04, train_time=1.558 +[gpub052:0/16] 2024-01-22 12:17:12,759 (trainer:737) INFO: 3epoch:train:7401-7500batch: iter_time=7.947e-05, forward_time=0.396, loss_ctc=135.145, loss_att=188.644, acc=0.262, loss=172.595, backward_time=0.522, grad_norm=52.431, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.105, optim0_lr0=1.618e-04, train_time=1.831 +[gpub052:0/16] 2024-01-22 12:17:32,789 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub052:0/16] 2024-01-22 12:17:52,499 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 12:17:56,317 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 12:17:56,317 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub052:0/16] 2024-01-22 12:17:56,320 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 12:30:18,368 (trainer:737) INFO: 3epoch:train:7501-7600batch: iter_time=5.982, forward_time=0.488, loss_ctc=130.015, loss_att=185.913, acc=0.254, loss=169.144, backward_time=0.517, grad_norm=58.030, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.115, optim0_lr0=1.633e-04, train_time=7.857 +[gpub052:0/16] 2024-01-22 12:32:43,596 (trainer:737) INFO: 3epoch:train:7601-7700batch: iter_time=8.201e-05, forward_time=0.346, loss_ctc=119.657, loss_att=184.073, acc=0.262, loss=164.749, backward_time=0.414, grad_norm=50.883, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.095, optim0_lr0=1.648e-04, train_time=1.452 +[gpub052:0/16] 2024-01-22 12:36:30,952 (trainer:737) INFO: 3epoch:train:7701-7800batch: iter_time=8.248e-05, forward_time=0.521, loss_ctc=123.986, loss_att=185.099, acc=0.259, loss=166.765, backward_time=0.487, grad_norm=54.926, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.102, optim0_lr0=1.663e-04, train_time=2.274 +[gpub052:0/16] 2024-01-22 12:38:59,880 (trainer:737) INFO: 3epoch:train:7801-7900batch: iter_time=0.001, forward_time=0.345, loss_ctc=123.573, loss_att=187.208, acc=0.261, loss=168.118, backward_time=0.418, grad_norm=48.652, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.096, optim0_lr0=1.678e-04, train_time=1.488 +[gpub052:0/16] 2024-01-22 12:42:01,731 (trainer:737) INFO: 3epoch:train:7901-8000batch: iter_time=8.428e-05, forward_time=0.443, loss_ctc=131.214, loss_att=184.994, acc=0.263, loss=168.860, backward_time=0.559, grad_norm=60.744, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.111, optim0_lr0=1.693e-04, train_time=1.820 +[gpub052:0/16] 2024-01-22 12:45:26,641 (trainer:737) INFO: 3epoch:train:8001-8100batch: iter_time=3.782e-04, forward_time=0.373, loss_ctc=117.458, loss_att=175.222, acc=0.262, loss=157.893, backward_time=0.412, grad_norm=44.600, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.093, optim0_lr0=1.708e-04, train_time=2.046 +[gpub052:0/16] 2024-01-22 12:48:17,438 (trainer:737) INFO: 3epoch:train:8101-8200batch: iter_time=8.086e-05, forward_time=0.494, loss_ctc=116.429, loss_att=176.431, acc=0.275, loss=158.431, backward_time=0.475, grad_norm=44.337, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.111, optim0_lr0=1.723e-04, train_time=1.709 +[gpub052:0/16] 2024-01-22 12:51:08,995 (trainer:737) INFO: 3epoch:train:8201-8300batch: iter_time=0.001, forward_time=0.335, loss_ctc=142.590, loss_att=191.926, acc=0.257, loss=177.125, backward_time=0.408, grad_norm=60.014, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.112, optim0_lr0=1.738e-04, train_time=1.715 +[gpub052:0/16] 2024-01-22 12:54:47,469 (trainer:737) INFO: 3epoch:train:8301-8400batch: iter_time=9.014e-05, forward_time=0.477, loss_ctc=119.624, loss_att=187.519, acc=0.277, loss=167.151, backward_time=0.447, grad_norm=48.772, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.113, optim0_lr0=1.753e-04, train_time=2.186 +[gpub052:0/16] 2024-01-22 12:57:06,140 (trainer:737) INFO: 3epoch:train:8401-8500batch: iter_time=1.970e-04, forward_time=0.335, loss_ctc=119.449, loss_att=184.190, acc=0.284, loss=164.768, backward_time=0.410, grad_norm=55.010, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.096, optim0_lr0=1.768e-04, train_time=1.384 +[gpub052:0/16] 2024-01-22 13:00:05,890 (trainer:737) INFO: 3epoch:train:8501-8600batch: iter_time=8.461e-05, forward_time=0.479, loss_ctc=115.939, loss_att=157.783, acc=0.289, loss=145.230, backward_time=0.474, grad_norm=54.845, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.101, optim0_lr0=1.783e-04, train_time=1.797 +[gpub052:0/16] 2024-01-22 13:03:16,743 (trainer:737) INFO: 3epoch:train:8601-8700batch: iter_time=8.490e-04, forward_time=0.329, loss_ctc=127.795, loss_att=184.594, acc=0.277, loss=167.554, backward_time=0.408, grad_norm=54.428, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.094, optim0_lr0=1.798e-04, train_time=1.909 +[gpub052:0/16] 2024-01-22 13:05:57,370 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub052:0/16] 2024-01-22 13:06:17,191 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 13:06:21,128 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 13:06:21,128 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub052:0/16] 2024-01-22 13:06:21,131 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 13:19:54,986 (trainer:737) INFO: 3epoch:train:8701-8800batch: iter_time=7.718, forward_time=0.491, loss_ctc=112.851, loss_att=156.137, acc=0.294, loss=143.151, backward_time=0.547, grad_norm=51.168, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.103, optim0_lr0=1.813e-04, train_time=9.984 +[gpub052:0/16] 2024-01-22 13:22:33,481 (trainer:737) INFO: 3epoch:train:8801-8900batch: iter_time=8.040e-05, forward_time=0.372, loss_ctc=138.122, loss_att=187.023, acc=0.271, loss=172.353, backward_time=0.416, grad_norm=66.047, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.094, optim0_lr0=1.828e-04, train_time=1.584 +[gpub052:0/16] 2024-01-22 13:26:50,187 (trainer:737) INFO: 3epoch:train:8901-9000batch: iter_time=8.113e-05, forward_time=0.470, loss_ctc=119.848, loss_att=181.943, acc=0.292, loss=163.315, backward_time=0.559, grad_norm=54.108, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.110, optim0_lr0=1.843e-04, train_time=2.568 +[gpub052:0/16] 2024-01-22 13:29:22,591 (trainer:737) INFO: 3epoch:train:9001-9100batch: iter_time=9.945e-05, forward_time=0.339, loss_ctc=110.222, loss_att=153.105, acc=0.285, loss=140.240, backward_time=0.411, grad_norm=52.979, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.098, optim0_lr0=1.858e-04, train_time=1.524 +[gpub052:0/16] 2024-01-22 13:33:39,112 (trainer:737) INFO: 3epoch:train:9101-9200batch: iter_time=9.256e-05, forward_time=0.958, loss_ctc=135.284, loss_att=199.662, acc=0.291, loss=180.349, backward_time=0.532, grad_norm=56.587, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.148, optim0_lr0=1.873e-04, train_time=2.561 +[gpub052:0/16] 2024-01-22 13:37:26,535 (trainer:737) INFO: 3epoch:train:9201-9300batch: iter_time=1.209e-04, forward_time=0.350, loss_ctc=113.539, loss_att=161.343, acc=0.292, loss=147.002, backward_time=0.409, grad_norm=53.157, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.094, optim0_lr0=1.888e-04, train_time=2.278 +[gpub052:0/16] 2024-01-22 13:40:41,653 (trainer:737) INFO: 3epoch:train:9301-9400batch: iter_time=9.915e-05, forward_time=0.530, loss_ctc=110.543, loss_att=158.745, acc=0.297, loss=144.285, backward_time=0.600, grad_norm=49.916, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.116, optim0_lr0=1.903e-04, train_time=1.950 +[gpub052:0/16] 2024-01-22 13:43:28,439 (trainer:737) INFO: 3epoch:train:9401-9500batch: iter_time=0.001, forward_time=0.339, loss_ctc=126.134, loss_att=177.453, acc=0.286, loss=162.057, backward_time=0.432, grad_norm=52.642, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.099, optim0_lr0=1.918e-04, train_time=1.667 +[gpub052:0/16] 2024-01-22 13:46:57,010 (trainer:737) INFO: 3epoch:train:9501-9600batch: iter_time=9.189e-05, forward_time=0.311, loss_ctc=139.951, loss_att=193.278, acc=0.295, loss=177.280, backward_time=0.408, grad_norm=65.420, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.097, optim0_lr0=1.933e-04, train_time=2.087 +[gpub052:0/16] 2024-01-22 13:50:31,476 (trainer:737) INFO: 3epoch:train:9601-9700batch: iter_time=9.624e-05, forward_time=0.632, loss_ctc=103.039, loss_att=154.670, acc=0.317, loss=139.180, backward_time=0.541, grad_norm=46.259, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.107, optim0_lr0=1.948e-04, train_time=2.144 +[gpub052:0/16] 2024-01-22 13:53:10,446 (trainer:737) INFO: 3epoch:train:9701-9800batch: iter_time=5.097e-04, forward_time=0.390, loss_ctc=129.038, loss_att=174.298, acc=0.323, loss=160.720, backward_time=0.441, grad_norm=63.136, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.096, optim0_lr0=1.963e-04, train_time=1.588 +[gpub052:0/16] 2024-01-22 13:56:36,443 (trainer:737) INFO: 3epoch:train:9801-9900batch: iter_time=8.932e-05, forward_time=0.330, loss_ctc=106.169, loss_att=148.551, acc=0.304, loss=135.836, backward_time=0.425, grad_norm=49.526, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.096, optim0_lr0=1.978e-04, train_time=2.062 +[gpub052:0/16] 2024-01-22 14:00:56,666 (trainer:737) INFO: 3epoch:train:9901-10000batch: iter_time=8.871e-05, forward_time=0.506, loss_ctc=124.331, loss_att=167.675, acc=0.321, loss=154.672, backward_time=0.599, grad_norm=56.682, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.124, optim0_lr0=1.993e-04, train_time=2.602 +[gpub052:0/16] 2024-01-22 14:01:16,757 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub052:0/16] 2024-01-22 14:01:36,976 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 14:01:40,701 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 14:01:40,701 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub052:0/16] 2024-01-22 14:01:40,704 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 14:19:31,141 (trainer:737) INFO: 3epoch:train:10001-10100batch: iter_time=8.721, forward_time=0.604, loss_ctc=125.234, loss_att=161.338, acc=0.300, loss=150.507, backward_time=0.463, grad_norm=68.037, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.102, optim0_lr0=2.008e-04, train_time=11.145 +[gpub052:0/16] 2024-01-22 14:23:27,469 (trainer:737) INFO: 3epoch:train:10101-10200batch: iter_time=9.558e-05, forward_time=0.552, loss_ctc=112.280, loss_att=159.281, acc=0.315, loss=145.180, backward_time=0.564, grad_norm=55.719, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.109, optim0_lr0=2.023e-04, train_time=2.363 +[gpub052:0/16] 2024-01-22 14:26:48,098 (trainer:737) INFO: 3epoch:train:10201-10300batch: iter_time=9.240e-05, forward_time=0.292, loss_ctc=114.348, loss_att=159.684, acc=0.316, loss=146.083, backward_time=0.400, grad_norm=57.011, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.093, optim0_lr0=2.038e-04, train_time=2.007 +[gpub052:0/16] 2024-01-22 14:30:17,741 (trainer:737) INFO: 3epoch:train:10301-10400batch: iter_time=8.996e-05, forward_time=0.689, loss_ctc=116.775, loss_att=159.996, acc=0.315, loss=147.030, backward_time=0.513, grad_norm=51.915, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.124, optim0_lr0=2.053e-04, train_time=2.096 +[gpub052:0/16] 2024-01-22 14:33:28,641 (trainer:737) INFO: 3epoch:train:10401-10500batch: iter_time=9.311e-05, forward_time=0.299, loss_ctc=122.506, loss_att=159.783, acc=0.322, loss=148.600, backward_time=0.401, grad_norm=61.199, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.094, optim0_lr0=2.068e-04, train_time=1.908 +[gpub052:0/16] 2024-01-22 14:36:53,875 (trainer:737) INFO: 3epoch:train:10501-10600batch: iter_time=9.394e-05, forward_time=0.606, loss_ctc=110.656, loss_att=151.738, acc=0.315, loss=139.413, backward_time=0.514, grad_norm=50.077, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.128, optim0_lr0=2.083e-04, train_time=2.053 +[gpub052:0/16] 2024-01-22 14:39:13,810 (trainer:737) INFO: 3epoch:train:10601-10700batch: iter_time=8.922e-05, forward_time=0.295, loss_ctc=109.807, loss_att=153.798, acc=0.334, loss=140.600, backward_time=0.404, grad_norm=52.079, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.093, optim0_lr0=2.098e-04, train_time=1.399 +[gpub052:0/16] 2024-01-22 14:43:27,117 (trainer:737) INFO: 3epoch:train:10701-10800batch: iter_time=5.819e-04, forward_time=0.296, loss_ctc=135.371, loss_att=171.852, acc=0.303, loss=160.908, backward_time=0.401, grad_norm=64.471, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.097, optim0_lr0=2.113e-04, train_time=2.532 +[gpub052:0/16] 2024-01-22 14:46:48,368 (trainer:737) INFO: 3epoch:train:10801-10900batch: iter_time=8.713e-05, forward_time=0.613, loss_ctc=113.254, loss_att=156.845, acc=0.351, loss=143.768, backward_time=0.564, grad_norm=55.352, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.119, optim0_lr0=2.128e-04, train_time=2.013 +[gpub052:0/16] 2024-01-22 14:49:52,856 (trainer:737) INFO: 3epoch:train:10901-11000batch: iter_time=8.710e-05, forward_time=0.345, loss_ctc=110.838, loss_att=152.568, acc=0.356, loss=140.049, backward_time=0.404, grad_norm=56.197, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.093, optim0_lr0=2.143e-04, train_time=1.845 +[gpub052:0/16] 2024-01-22 14:52:55,254 (trainer:737) INFO: 3epoch:train:11001-11100batch: iter_time=8.819e-05, forward_time=0.291, loss_ctc=108.345, loss_att=138.428, acc=0.347, loss=129.403, backward_time=0.399, grad_norm=56.292, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.093, optim0_lr0=2.158e-04, train_time=1.824 +[gpub052:0/16] 2024-01-22 14:56:21,143 (trainer:737) INFO: 3epoch:train:11101-11200batch: iter_time=0.007, forward_time=0.667, loss_ctc=120.528, loss_att=158.275, acc=0.338, loss=146.951, backward_time=0.527, grad_norm=53.128, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.124, optim0_lr0=2.173e-04, train_time=2.058 +[gpub052:0/16] 2024-01-22 14:58:01,940 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub052:0/16] 2024-01-22 14:58:21,856 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 14:58:25,591 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 14:58:25,592 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub052:0/16] 2024-01-22 14:58:25,595 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 15:14:20,721 (trainer:737) INFO: 3epoch:train:11201-11300batch: iter_time=8.698, forward_time=0.291, loss_ctc=105.264, loss_att=138.630, acc=0.352, loss=128.620, backward_time=0.404, grad_norm=50.522, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.094, optim0_lr0=2.188e-04, train_time=10.796 +[gpub052:0/16] 2024-01-22 15:20:25,152 (trainer:737) INFO: 3epoch:train:11301-11400batch: iter_time=8.440e-05, forward_time=0.532, loss_ctc=126.919, loss_att=164.101, acc=0.332, loss=152.946, backward_time=0.625, grad_norm=67.157, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.110, optim0_lr0=2.203e-04, train_time=3.644 +[gpub052:0/16] 2024-01-22 15:23:56,572 (trainer:737) INFO: 3epoch:train:11401-11500batch: iter_time=8.363e-05, forward_time=0.361, loss_ctc=111.997, loss_att=160.913, acc=0.359, loss=146.238, backward_time=0.412, grad_norm=54.079, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.111, optim0_lr0=2.218e-04, train_time=2.114 +[gpub052:0/16] 2024-01-22 15:28:02,674 (trainer:737) INFO: 3epoch:train:11501-11600batch: iter_time=8.595e-05, forward_time=0.417, loss_ctc=102.355, loss_att=132.186, acc=0.346, loss=123.237, backward_time=0.674, grad_norm=57.067, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.106, optim0_lr0=2.233e-04, train_time=2.461 +[gpub052:0/16] 2024-01-22 15:32:43,923 (trainer:737) INFO: 3epoch:train:11601-11700batch: iter_time=8.474e-05, forward_time=0.506, loss_ctc=127.209, loss_att=175.732, acc=0.362, loss=161.175, backward_time=0.661, grad_norm=63.211, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.120, optim0_lr0=2.248e-04, train_time=2.812 +[gpub052:0/16] 2024-01-22 15:36:53,480 (trainer:737) INFO: 3epoch:train:11701-11800batch: iter_time=4.109e-04, forward_time=0.290, loss_ctc=107.211, loss_att=140.249, acc=0.359, loss=130.338, backward_time=0.398, grad_norm=52.720, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.094, optim0_lr0=2.263e-04, train_time=2.495 +[gpub052:0/16] 2024-01-22 15:40:57,707 (trainer:737) INFO: 3epoch:train:11801-11900batch: iter_time=8.504e-05, forward_time=0.618, loss_ctc=102.616, loss_att=138.587, acc=0.363, loss=127.796, backward_time=0.599, grad_norm=50.760, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.118, optim0_lr0=2.278e-04, train_time=2.441 +[gpub052:0/16] 2024-01-22 15:45:14,829 (trainer:737) INFO: 3epoch:train:11901-12000batch: iter_time=3.796e-04, forward_time=0.302, loss_ctc=118.706, loss_att=158.193, acc=0.345, loss=146.347, backward_time=0.406, grad_norm=55.370, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.095, optim0_lr0=2.293e-04, train_time=2.572 +[gpub052:0/16] 2024-01-22 15:50:23,449 (trainer:737) INFO: 3epoch:train:12001-12100batch: iter_time=8.601e-05, forward_time=0.598, loss_ctc=130.337, loss_att=168.136, acc=0.365, loss=156.796, backward_time=0.604, grad_norm=71.018, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.107, optim0_lr0=2.308e-04, train_time=3.086 +[gpub052:0/16] 2024-01-22 15:53:36,235 (trainer:737) INFO: 3epoch:train:12101-12200batch: iter_time=2.975e-04, forward_time=0.561, loss_ctc=95.436, loss_att=128.651, acc=0.399, loss=118.686, backward_time=0.668, grad_norm=54.919, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.120, optim0_lr0=2.323e-04, train_time=1.924 +[gpub052:0/16] 2024-01-22 15:58:08,715 (trainer:737) INFO: 3epoch:train:12201-12300batch: iter_time=4.373e-04, forward_time=0.299, loss_ctc=121.773, loss_att=149.237, acc=0.398, loss=140.998, backward_time=0.405, grad_norm=65.711, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.094, optim0_lr0=2.338e-04, train_time=2.727 +[gpub052:0/16] 2024-01-22 16:03:23,856 (trainer:737) INFO: 3epoch:train:12301-12400batch: iter_time=0.001, forward_time=0.523, loss_ctc=99.370, loss_att=129.888, acc=0.367, loss=120.732, backward_time=0.761, grad_norm=51.583, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.130, optim0_lr0=2.353e-04, train_time=3.149 +[gpub052:0/16] 2024-01-22 16:06:00,474 (trainer:737) INFO: 3epoch:train:12401-12500batch: iter_time=0.002, forward_time=0.329, loss_ctc=118.861, loss_att=145.267, acc=0.392, loss=137.345, backward_time=0.407, grad_norm=56.123, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.094, optim0_lr0=2.368e-04, train_time=1.568 +[gpub052:0/16] 2024-01-22 16:06:20,533 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub052:0/16] 2024-01-22 16:06:40,915 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 16:06:44,565 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 16:06:44,565 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub052:0/16] 2024-01-22 16:06:44,568 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 16:21:47,381 (trainer:737) INFO: 3epoch:train:12501-12600batch: iter_time=7.865, forward_time=0.384, loss_ctc=116.454, loss_att=138.577, acc=0.372, loss=131.940, backward_time=0.432, grad_norm=61.446, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.099, optim0_lr0=2.383e-04, train_time=9.469 +[gpub052:0/16] 2024-01-22 16:24:30,937 (trainer:737) INFO: 3epoch:train:12601-12700batch: iter_time=8.097e-05, forward_time=0.481, loss_ctc=104.938, loss_att=138.145, acc=0.386, loss=128.183, backward_time=0.427, grad_norm=59.748, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.097, optim0_lr0=2.398e-04, train_time=1.635 +[gpub052:0/16] 2024-01-22 16:26:46,751 (trainer:737) INFO: 3epoch:train:12701-12800batch: iter_time=0.001, forward_time=0.293, loss_ctc=109.241, loss_att=138.572, acc=0.386, loss=129.773, backward_time=0.405, grad_norm=60.643, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.094, optim0_lr0=2.413e-04, train_time=1.358 +[gpub052:0/16] 2024-01-22 16:29:16,167 (trainer:737) INFO: 3epoch:train:12801-12900batch: iter_time=8.852e-05, forward_time=0.366, loss_ctc=109.005, loss_att=138.453, acc=0.382, loss=129.619, backward_time=0.452, grad_norm=53.130, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.101, optim0_lr0=2.428e-04, train_time=1.494 +[gpub052:0/16] 2024-01-22 16:31:49,813 (trainer:737) INFO: 3epoch:train:12901-13000batch: iter_time=8.975e-05, forward_time=0.327, loss_ctc=115.447, loss_att=140.594, acc=0.389, loss=133.050, backward_time=0.431, grad_norm=59.457, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.098, optim0_lr0=2.443e-04, train_time=1.536 +[gpub052:0/16] 2024-01-22 16:34:17,744 (trainer:737) INFO: 3epoch:train:13001-13100batch: iter_time=8.774e-05, forward_time=0.396, loss_ctc=103.607, loss_att=133.343, acc=0.378, loss=124.422, backward_time=0.425, grad_norm=52.857, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.100, optim0_lr0=2.458e-04, train_time=1.479 +[gpub052:0/16] 2024-01-22 16:38:09,017 (trainer:737) INFO: 3epoch:train:13101-13200batch: iter_time=8.680e-05, forward_time=0.539, loss_ctc=103.036, loss_att=131.193, acc=0.412, loss=122.746, backward_time=0.557, grad_norm=49.190, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.118, optim0_lr0=2.473e-04, train_time=2.312 +[gpub052:0/16] 2024-01-22 16:41:22,366 (trainer:737) INFO: 3epoch:train:13201-13300batch: iter_time=8.306e-05, forward_time=0.350, loss_ctc=129.776, loss_att=149.167, acc=0.375, loss=143.350, backward_time=0.414, grad_norm=64.970, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.094, optim0_lr0=2.488e-04, train_time=1.934 +[gpub052:0/16] 2024-01-22 16:44:36,881 (trainer:737) INFO: 3epoch:train:13301-13400batch: iter_time=8.489e-05, forward_time=0.598, loss_ctc=106.115, loss_att=132.149, acc=0.427, loss=124.339, backward_time=0.495, grad_norm=55.311, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.150, optim0_lr0=2.503e-04, train_time=1.943 +[gpub052:0/16] 2024-01-22 16:48:02,996 (trainer:737) INFO: 3epoch:train:13401-13500batch: iter_time=9.492e-05, forward_time=0.323, loss_ctc=104.672, loss_att=128.553, acc=0.435, loss=121.388, backward_time=0.401, grad_norm=54.788, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.095, optim0_lr0=2.518e-04, train_time=2.061 +[gpub052:0/16] 2024-01-22 16:51:25,056 (trainer:737) INFO: 3epoch:train:13501-13600batch: iter_time=9.024e-05, forward_time=0.690, loss_ctc=103.498, loss_att=119.164, acc=0.418, loss=114.464, backward_time=0.467, grad_norm=62.291, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.101, optim0_lr0=2.533e-04, train_time=2.020 +[gpub052:0/16] 2024-01-22 16:55:21,661 (trainer:737) INFO: 3epoch:train:13601-13700batch: iter_time=0.002, forward_time=0.517, loss_ctc=112.942, loss_att=138.708, acc=0.400, loss=130.978, backward_time=0.594, grad_norm=52.201, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.113, optim0_lr0=2.548e-04, train_time=2.365 +[gpub052:0/16] 2024-01-22 16:57:09,192 (multiple_iter_factory:32) INFO: Building 11th iter-factory... + +gpub054:3241516:3241579 [0] misc/ibvwrap.cc:230 NCCL WARN Call to ibv_get_async_event failed +[gpub052:0/16] 2024-01-22 16:57:29,611 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 16:57:33,196 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 16:57:33,196 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub052:0/16] 2024-01-22 16:57:33,200 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 17:10:04,841 (trainer:737) INFO: 3epoch:train:13701-13800batch: iter_time=7.071, forward_time=0.378, loss_ctc=99.347, loss_att=115.622, acc=0.425, loss=110.740, backward_time=0.424, grad_norm=54.088, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.095, optim0_lr0=2.563e-04, train_time=8.833 +[gpub052:0/16] 2024-01-22 17:13:58,620 (trainer:737) INFO: 3epoch:train:13801-13900batch: iter_time=8.173e-05, forward_time=0.507, loss_ctc=119.308, loss_att=137.076, acc=0.396, loss=131.746, backward_time=0.572, grad_norm=63.856, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.116, optim0_lr0=2.578e-04, train_time=2.337 +[gpub052:0/16] 2024-01-22 17:17:30,506 (trainer:737) INFO: 3epoch:train:13901-14000batch: iter_time=8.026e-05, forward_time=0.677, loss_ctc=105.777, loss_att=135.396, acc=0.426, loss=126.510, backward_time=0.512, grad_norm=52.613, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.111, optim0_lr0=2.593e-04, train_time=2.117 +[gpub052:0/16] 2024-01-22 17:20:13,107 (trainer:737) INFO: 3epoch:train:14001-14100batch: iter_time=8.125e-05, forward_time=0.294, loss_ctc=96.728, loss_att=111.695, acc=0.403, loss=107.205, backward_time=0.410, grad_norm=53.172, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.094, optim0_lr0=2.608e-04, train_time=1.627 +[gpub052:0/16] 2024-01-22 17:24:11,314 (trainer:737) INFO: 3epoch:train:14101-14200batch: iter_time=0.001, forward_time=0.570, loss_ctc=120.348, loss_att=149.901, acc=0.421, loss=141.035, backward_time=0.521, grad_norm=63.433, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.125, optim0_lr0=2.623e-04, train_time=2.381 +[gpub052:0/16] 2024-01-22 17:27:33,089 (trainer:737) INFO: 3epoch:train:14201-14300batch: iter_time=8.311e-05, forward_time=0.758, loss_ctc=99.757, loss_att=118.130, acc=0.427, loss=112.618, backward_time=0.489, grad_norm=51.692, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.108, optim0_lr0=2.638e-04, train_time=2.019 +[gpub052:0/16] 2024-01-22 17:30:39,765 (trainer:737) INFO: 3epoch:train:14301-14400batch: iter_time=3.432e-04, forward_time=0.451, loss_ctc=96.953, loss_att=120.370, acc=0.423, loss=113.345, backward_time=0.536, grad_norm=48.298, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.117, optim0_lr0=2.653e-04, train_time=1.865 +[gpub052:0/16] 2024-01-22 17:35:36,190 (trainer:737) INFO: 3epoch:train:14401-14500batch: iter_time=5.346e-04, forward_time=0.726, loss_ctc=112.654, loss_att=139.141, acc=0.400, loss=131.195, backward_time=0.507, grad_norm=53.530, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.141, optim0_lr0=2.668e-04, train_time=2.965 +[gpub052:0/16] 2024-01-22 17:38:10,492 (trainer:737) INFO: 3epoch:train:14501-14600batch: iter_time=2.540e-04, forward_time=0.366, loss_ctc=123.881, loss_att=145.076, acc=0.424, loss=138.717, backward_time=0.410, grad_norm=62.783, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.097, optim0_lr0=2.683e-04, train_time=1.540 +[gpub052:0/16] 2024-01-22 17:42:01,979 (trainer:737) INFO: 3epoch:train:14601-14700batch: iter_time=8.081e-05, forward_time=0.661, loss_ctc=90.493, loss_att=107.280, acc=0.468, loss=102.244, backward_time=0.490, grad_norm=50.053, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.196, optim0_lr0=2.698e-04, train_time=2.316 +[gpub052:0/16] 2024-01-22 17:46:22,057 (trainer:737) INFO: 3epoch:train:14701-14800batch: iter_time=8.209e-05, forward_time=0.633, loss_ctc=114.889, loss_att=126.375, acc=0.464, loss=122.929, backward_time=0.448, grad_norm=59.215, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.127, optim0_lr0=2.713e-04, train_time=2.597 +[gpub052:0/16] 2024-01-22 17:49:01,364 (trainer:737) INFO: 3epoch:train:14801-14900batch: iter_time=3.005e-04, forward_time=0.297, loss_ctc=95.049, loss_att=113.094, acc=0.423, loss=107.680, backward_time=0.411, grad_norm=54.421, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.095, optim0_lr0=2.728e-04, train_time=1.597 +[gpub052:0/16] 2024-01-22 17:53:25,039 (trainer:737) INFO: 3epoch:train:14901-15000batch: iter_time=3.913e-04, forward_time=0.655, loss_ctc=113.689, loss_att=125.617, acc=0.452, loss=122.039, backward_time=0.619, grad_norm=55.788, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.120, optim0_lr0=2.743e-04, train_time=2.635 + +gpub054:3241516:3241583 [3] proxy.cc:1059 NCCL WARN [Proxy Service] Poll failed with error 1 +[gpub052:0/16] 2024-01-22 18:40:43,570 (trainer:343) INFO: 3epoch results: [train] iter_time=0.590, forward_time=0.436, loss_ctc=129.551, loss_att=173.511, acc=0.291, loss=160.323, backward_time=0.470, grad_norm=57.062, clip=100.000, loss_scale=5.469e+10, optim_step_time=0.105, optim0_lr0=1.625e-04, train_time=2.831, time=11 hours, 48 minutes and 17.87 seconds, total_count=45000, gpu_max_cached_mem_GB=40.713, [valid] loss_ctc=101.939, cer_ctc=0.487, loss_att=102.070, acc=0.327, cer=0.565, wer=1.000, loss=102.030, time=46 minutes and 53.89 seconds, total_count=14013, gpu_max_cached_mem_GB=40.713 +[gpub052:0/16] 2024-01-22 18:41:23,025 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub052:0/16] 2024-01-22 18:41:23,026 (trainer:272) INFO: 4/45epoch started. Estimated time to finish: 3 weeks, 1 day and 1 hour +[gpub052:0/16] 2024-01-22 18:41:23,036 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub052:0/16] 2024-01-22 18:41:42,037 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 18:41:45,554 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 18:41:45,554 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub052:0/16] 2024-01-22 18:41:45,557 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 18:54:43,453 (trainer:737) INFO: 4epoch:train:1-100batch: iter_time=6.491, forward_time=0.431, loss_ctc=102.721, loss_att=134.870, acc=0.438, loss=125.226, backward_time=0.450, grad_norm=49.335, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.098, optim0_lr0=2.758e-04, train_time=8.004 +[gpub052:0/16] 2024-01-22 18:57:12,087 (trainer:737) INFO: 4epoch:train:101-200batch: iter_time=8.015e-05, forward_time=0.323, loss_ctc=108.679, loss_att=115.329, acc=0.455, loss=113.334, backward_time=0.407, grad_norm=58.778, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.103, optim0_lr0=2.773e-04, train_time=1.486 +[gpub052:0/16] 2024-01-22 19:00:59,519 (trainer:737) INFO: 4epoch:train:201-300batch: iter_time=7.988e-05, forward_time=0.545, loss_ctc=105.523, loss_att=129.595, acc=0.411, loss=122.373, backward_time=0.481, grad_norm=55.661, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.123, optim0_lr0=2.788e-04, train_time=2.275 +[gpub052:0/16] 2024-01-22 19:03:34,097 (trainer:737) INFO: 4epoch:train:301-400batch: iter_time=8.337e-05, forward_time=0.293, loss_ctc=103.589, loss_att=117.914, acc=0.459, loss=113.617, backward_time=0.406, grad_norm=54.214, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.095, optim0_lr0=2.803e-04, train_time=1.546 +[gpub052:0/16] 2024-01-22 19:07:21,620 (trainer:737) INFO: 4epoch:train:401-500batch: iter_time=8.076e-05, forward_time=0.509, loss_ctc=95.065, loss_att=107.264, acc=0.439, loss=103.604, backward_time=0.566, grad_norm=49.986, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.120, optim0_lr0=2.818e-04, train_time=2.274 +[gpub052:0/16] 2024-01-22 19:10:12,854 (trainer:737) INFO: 4epoch:train:501-600batch: iter_time=8.022e-05, forward_time=0.293, loss_ctc=124.618, loss_att=138.728, acc=0.427, loss=134.495, backward_time=0.403, grad_norm=62.402, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.095, optim0_lr0=2.833e-04, train_time=1.713 +[gpub052:0/16] 2024-01-22 19:13:34,209 (trainer:737) INFO: 4epoch:train:601-700batch: iter_time=8.260e-05, forward_time=0.564, loss_ctc=106.494, loss_att=113.098, acc=0.436, loss=111.117, backward_time=0.474, grad_norm=51.388, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.156, optim0_lr0=2.848e-04, train_time=2.011 +[gpub052:0/16] 2024-01-22 19:16:59,809 (trainer:737) INFO: 4epoch:train:701-800batch: iter_time=8.944e-05, forward_time=0.290, loss_ctc=104.552, loss_att=122.921, acc=0.435, loss=117.410, backward_time=0.398, grad_norm=54.619, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.094, optim0_lr0=2.863e-04, train_time=2.058 +[gpub052:0/16] 2024-01-22 19:19:56,767 (trainer:737) INFO: 4epoch:train:801-900batch: iter_time=8.282e-04, forward_time=0.294, loss_ctc=120.090, loss_att=142.078, acc=0.432, loss=135.482, backward_time=0.404, grad_norm=57.593, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.097, optim0_lr0=2.878e-04, train_time=1.768 +[gpub052:0/16] 2024-01-22 19:23:22,824 (trainer:737) INFO: 4epoch:train:901-1000batch: iter_time=8.504e-05, forward_time=0.466, loss_ctc=95.242, loss_att=114.233, acc=0.472, loss=108.536, backward_time=0.563, grad_norm=46.783, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.111, optim0_lr0=2.893e-04, train_time=2.060 +[gpub052:0/16] 2024-01-22 19:26:08,155 (trainer:737) INFO: 4epoch:train:1001-1100batch: iter_time=8.478e-05, forward_time=0.290, loss_ctc=97.166, loss_att=98.750, acc=0.479, loss=98.275, backward_time=0.400, grad_norm=53.053, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.095, optim0_lr0=2.908e-04, train_time=1.655 +[gpub052:0/16] 2024-01-22 19:29:36,790 (trainer:737) INFO: 4epoch:train:1101-1200batch: iter_time=1.015e-04, forward_time=0.493, loss_ctc=111.889, loss_att=116.813, acc=0.451, loss=115.336, backward_time=0.520, grad_norm=57.702, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.113, optim0_lr0=2.923e-04, train_time=2.086 +[gpub052:0/16] 2024-01-22 19:31:04,120 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub052:0/16] 2024-01-22 19:31:24,000 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 19:31:27,682 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 19:31:27,682 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub052:0/16] 2024-01-22 19:31:27,686 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 19:41:15,068 (trainer:737) INFO: 4epoch:train:1201-1300batch: iter_time=5.595, forward_time=0.292, loss_ctc=96.477, loss_att=116.176, acc=0.459, loss=110.266, backward_time=0.410, grad_norm=49.422, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.094, optim0_lr0=2.938e-04, train_time=6.983 +[gpub052:0/16] 2024-01-22 19:43:45,285 (trainer:737) INFO: 4epoch:train:1301-1400batch: iter_time=8.387e-05, forward_time=0.292, loss_ctc=106.128, loss_att=119.030, acc=0.481, loss=115.159, backward_time=0.404, grad_norm=53.020, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.094, optim0_lr0=2.953e-04, train_time=1.502 +[gpub052:0/16] 2024-01-22 19:46:38,208 (trainer:737) INFO: 4epoch:train:1401-1500batch: iter_time=0.002, forward_time=0.447, loss_ctc=103.150, loss_att=127.873, acc=0.443, loss=120.456, backward_time=0.449, grad_norm=53.725, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.108, optim0_lr0=2.968e-04, train_time=1.727 +[gpub052:0/16] 2024-01-22 19:48:54,310 (trainer:737) INFO: 4epoch:train:1501-1600batch: iter_time=8.653e-05, forward_time=0.290, loss_ctc=94.525, loss_att=109.548, acc=0.450, loss=105.041, backward_time=0.402, grad_norm=46.833, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.095, optim0_lr0=2.983e-04, train_time=1.362 +[gpub052:0/16] 2024-01-22 19:51:54,339 (trainer:737) INFO: 4epoch:train:1601-1700batch: iter_time=8.408e-05, forward_time=0.292, loss_ctc=103.499, loss_att=105.138, acc=0.487, loss=104.647, backward_time=0.400, grad_norm=51.479, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.094, optim0_lr0=2.998e-04, train_time=1.801 +[gpub052:0/16] 2024-01-22 19:56:17,699 (trainer:737) INFO: 4epoch:train:1701-1800batch: iter_time=8.563e-05, forward_time=0.487, loss_ctc=97.788, loss_att=106.221, acc=0.454, loss=103.691, backward_time=0.527, grad_norm=52.135, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.105, optim0_lr0=3.013e-04, train_time=2.633 +[gpub052:0/16] 2024-01-22 19:58:54,810 (trainer:737) INFO: 4epoch:train:1801-1900batch: iter_time=8.223e-05, forward_time=0.415, loss_ctc=114.647, loss_att=128.663, acc=0.441, loss=124.458, backward_time=0.431, grad_norm=56.391, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.094, optim0_lr0=3.028e-04, train_time=1.571 +[gpub052:0/16] 2024-01-22 20:01:09,924 (trainer:737) INFO: 4epoch:train:1901-2000batch: iter_time=8.469e-05, forward_time=0.290, loss_ctc=106.921, loss_att=110.359, acc=0.464, loss=109.328, backward_time=0.401, grad_norm=52.576, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.094, optim0_lr0=3.043e-04, train_time=1.348 +[gpub052:0/16] 2024-01-22 20:03:57,052 (trainer:737) INFO: 4epoch:train:2001-2100batch: iter_time=9.745e-04, forward_time=0.308, loss_ctc=115.620, loss_att=138.348, acc=0.446, loss=131.529, backward_time=0.420, grad_norm=52.839, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.097, optim0_lr0=3.058e-04, train_time=1.675 +[gpub052:0/16] 2024-01-22 20:07:44,614 (trainer:737) INFO: 4epoch:train:2101-2200batch: iter_time=8.214e-05, forward_time=0.545, loss_ctc=90.686, loss_att=103.736, acc=0.469, loss=99.821, backward_time=0.518, grad_norm=46.206, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.106, optim0_lr0=3.073e-04, train_time=2.275 +[gpub052:0/16] 2024-01-22 20:09:45,536 (trainer:737) INFO: 4epoch:train:2201-2300batch: iter_time=8.183e-05, forward_time=0.291, loss_ctc=90.250, loss_att=106.626, acc=0.488, loss=101.713, backward_time=0.405, grad_norm=44.047, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.094, optim0_lr0=3.088e-04, train_time=1.209 +[gpub052:0/16] 2024-01-22 20:12:37,663 (trainer:737) INFO: 4epoch:train:2301-2400batch: iter_time=8.353e-05, forward_time=0.292, loss_ctc=109.421, loss_att=101.950, acc=0.492, loss=104.192, backward_time=0.401, grad_norm=58.166, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.096, optim0_lr0=3.103e-04, train_time=1.719 +[gpub052:0/16] 2024-01-22 20:16:15,450 (trainer:737) INFO: 4epoch:train:2401-2500batch: iter_time=0.002, forward_time=0.521, loss_ctc=96.448, loss_att=102.637, acc=0.500, loss=100.780, backward_time=0.524, grad_norm=48.423, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.120, optim0_lr0=3.118e-04, train_time=2.181 +[gpub052:0/16] 2024-01-22 20:16:35,496 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub052:0/16] 2024-01-22 20:16:55,354 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 20:16:59,339 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 20:16:59,339 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub052:0/16] 2024-01-22 20:16:59,343 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 20:30:44,888 (trainer:737) INFO: 4epoch:train:2501-2600batch: iter_time=6.799, forward_time=0.292, loss_ctc=97.552, loss_att=132.380, acc=0.469, loss=121.931, backward_time=0.405, grad_norm=53.296, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.095, optim0_lr0=3.133e-04, train_time=8.694 +[gpub052:0/16] 2024-01-22 20:33:58,230 (trainer:737) INFO: 4epoch:train:2601-2700batch: iter_time=0.001, forward_time=0.551, loss_ctc=103.168, loss_att=117.520, acc=0.486, loss=113.214, backward_time=0.491, grad_norm=58.918, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.121, optim0_lr0=3.148e-04, train_time=1.931 +[gpub052:0/16] 2024-01-22 20:36:44,161 (trainer:737) INFO: 4epoch:train:2701-2800batch: iter_time=8.614e-05, forward_time=0.291, loss_ctc=100.606, loss_att=124.744, acc=0.444, loss=117.503, backward_time=0.400, grad_norm=51.030, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.095, optim0_lr0=3.163e-04, train_time=1.659 +[gpub052:0/16] 2024-01-22 20:39:49,168 (trainer:737) INFO: 4epoch:train:2801-2900batch: iter_time=2.512e-04, forward_time=0.326, loss_ctc=98.807, loss_att=110.894, acc=0.501, loss=107.268, backward_time=0.445, grad_norm=51.324, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.100, optim0_lr0=3.178e-04, train_time=1.852 +[gpub052:0/16] 2024-01-22 20:42:50,999 (trainer:737) INFO: 4epoch:train:2901-3000batch: iter_time=8.860e-05, forward_time=0.463, loss_ctc=90.170, loss_att=100.679, acc=0.489, loss=97.526, backward_time=0.545, grad_norm=47.451, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.108, optim0_lr0=3.193e-04, train_time=1.818 +[gpub052:0/16] 2024-01-22 20:45:25,244 (trainer:737) INFO: 4epoch:train:3001-3100batch: iter_time=4.813e-04, forward_time=0.332, loss_ctc=120.109, loss_att=137.089, acc=0.457, loss=131.995, backward_time=0.416, grad_norm=57.888, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.099, optim0_lr0=3.208e-04, train_time=1.540 +[gpub052:0/16] 2024-01-22 20:48:43,774 (trainer:737) INFO: 4epoch:train:3101-3200batch: iter_time=8.518e-05, forward_time=0.451, loss_ctc=100.290, loss_att=105.460, acc=0.479, loss=103.909, backward_time=0.472, grad_norm=50.996, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.106, optim0_lr0=3.223e-04, train_time=1.987 +[gpub052:0/16] 2024-01-22 20:51:15,846 (trainer:737) INFO: 4epoch:train:3201-3300batch: iter_time=3.210e-04, forward_time=0.312, loss_ctc=98.129, loss_att=120.334, acc=0.465, loss=113.672, backward_time=0.405, grad_norm=49.224, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.095, optim0_lr0=3.238e-04, train_time=1.521 +[gpub052:0/16] 2024-01-22 20:54:30,960 (trainer:737) INFO: 4epoch:train:3301-3400batch: iter_time=8.233e-05, forward_time=0.550, loss_ctc=114.734, loss_att=134.516, acc=0.477, loss=128.582, backward_time=0.504, grad_norm=55.482, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.108, optim0_lr0=3.253e-04, train_time=1.948 +[gpub052:0/16] 2024-01-22 20:57:44,121 (trainer:737) INFO: 4epoch:train:3401-3500batch: iter_time=9.744e-05, forward_time=0.292, loss_ctc=90.139, loss_att=109.294, acc=0.512, loss=103.548, backward_time=0.402, grad_norm=44.963, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.099, optim0_lr0=3.268e-04, train_time=1.934 +[gpub052:0/16] 2024-01-22 21:00:59,337 (trainer:737) INFO: 4epoch:train:3501-3600batch: iter_time=1.029e-04, forward_time=0.579, loss_ctc=92.778, loss_att=90.607, acc=0.513, loss=91.259, backward_time=0.517, grad_norm=50.277, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.130, optim0_lr0=3.283e-04, train_time=1.952 +[gpub052:0/16] 2024-01-22 21:04:21,894 (trainer:737) INFO: 4epoch:train:3601-3700batch: iter_time=8.484e-05, forward_time=0.301, loss_ctc=106.074, loss_att=107.950, acc=0.491, loss=107.387, backward_time=0.404, grad_norm=56.387, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.096, optim0_lr0=3.298e-04, train_time=2.023 +[gpub052:0/16] 2024-01-22 21:06:09,254 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub052:0/16] 2024-01-22 21:06:29,254 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 21:06:32,886 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 21:06:32,886 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub052:0/16] 2024-01-22 21:06:32,889 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 21:21:32,863 (trainer:737) INFO: 4epoch:train:3701-3800batch: iter_time=7.934, forward_time=0.746, loss_ctc=91.137, loss_att=111.375, acc=0.495, loss=105.304, backward_time=0.501, grad_norm=46.442, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.116, optim0_lr0=3.313e-04, train_time=10.312 +[gpub052:0/16] 2024-01-22 21:26:00,413 (trainer:737) INFO: 4epoch:train:3801-3900batch: iter_time=0.167, forward_time=0.297, loss_ctc=102.310, loss_att=115.360, acc=0.513, loss=111.445, backward_time=0.415, grad_norm=54.301, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.094, optim0_lr0=3.328e-04, train_time=2.675 +[gpub052:0/16] 2024-01-22 21:31:10,267 (trainer:737) INFO: 4epoch:train:3901-4000batch: iter_time=8.322e-05, forward_time=0.599, loss_ctc=97.461, loss_att=123.955, acc=0.471, loss=116.007, backward_time=0.519, grad_norm=55.324, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.143, optim0_lr0=3.343e-04, train_time=3.098 +[gpub052:0/16] 2024-01-22 21:33:51,262 (trainer:737) INFO: 4epoch:train:4001-4100batch: iter_time=8.562e-05, forward_time=0.289, loss_ctc=89.303, loss_att=100.770, acc=0.490, loss=97.330, backward_time=0.406, grad_norm=46.203, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.095, optim0_lr0=3.358e-04, train_time=1.606 +[gpub052:0/16] 2024-01-22 21:37:36,331 (trainer:737) INFO: 4epoch:train:4101-4200batch: iter_time=9.558e-05, forward_time=0.409, loss_ctc=98.042, loss_att=98.049, acc=0.527, loss=98.047, backward_time=0.428, grad_norm=48.356, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.104, optim0_lr0=3.373e-04, train_time=2.254 +[gpub052:0/16] 2024-01-22 21:40:39,249 (trainer:737) INFO: 4epoch:train:4201-4300batch: iter_time=8.102e-05, forward_time=0.475, loss_ctc=94.870, loss_att=98.856, acc=0.491, loss=97.660, backward_time=0.461, grad_norm=51.806, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.102, optim0_lr0=3.388e-04, train_time=1.829 +[gpub052:0/16] 2024-01-22 21:43:36,767 (trainer:737) INFO: 4epoch:train:4301-4400batch: iter_time=2.595e-04, forward_time=0.335, loss_ctc=109.948, loss_att=126.674, acc=0.464, loss=121.656, backward_time=0.517, grad_norm=55.438, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.105, optim0_lr0=3.403e-04, train_time=1.775 +[gpub052:0/16] 2024-01-22 21:48:30,837 (trainer:737) INFO: 4epoch:train:4401-4500batch: iter_time=8.195e-05, forward_time=0.560, loss_ctc=103.386, loss_att=106.288, acc=0.495, loss=105.418, backward_time=0.468, grad_norm=50.079, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.102, optim0_lr0=3.418e-04, train_time=2.939 +[gpub052:0/16] 2024-01-22 21:51:06,096 (trainer:737) INFO: 4epoch:train:4501-4600batch: iter_time=8.238e-05, forward_time=0.296, loss_ctc=110.946, loss_att=132.659, acc=0.483, loss=126.145, backward_time=0.408, grad_norm=52.862, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.095, optim0_lr0=3.433e-04, train_time=1.554 +[gpub052:0/16] 2024-01-22 21:56:17,793 (trainer:737) INFO: 4epoch:train:4601-4700batch: iter_time=3.321e-04, forward_time=0.558, loss_ctc=88.269, loss_att=99.804, acc=0.505, loss=96.344, backward_time=0.601, grad_norm=46.711, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.118, optim0_lr0=3.448e-04, train_time=3.115 +[gpub052:0/16] 2024-01-22 22:01:22,862 (trainer:737) INFO: 4epoch:train:4701-4800batch: iter_time=0.137, forward_time=0.297, loss_ctc=86.909, loss_att=100.168, acc=0.518, loss=96.190, backward_time=0.422, grad_norm=44.094, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.095, optim0_lr0=3.463e-04, train_time=3.052 +[gpub052:0/16] 2024-01-22 22:04:03,018 (trainer:737) INFO: 4epoch:train:4801-4900batch: iter_time=8.466e-05, forward_time=0.292, loss_ctc=104.761, loss_att=95.696, acc=0.520, loss=98.415, backward_time=0.405, grad_norm=55.750, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.094, optim0_lr0=3.478e-04, train_time=1.601 +[gpub052:0/16] 2024-01-22 22:08:11,052 (trainer:737) INFO: 4epoch:train:4901-5000batch: iter_time=3.270e-04, forward_time=0.554, loss_ctc=91.561, loss_att=95.842, acc=0.535, loss=94.558, backward_time=0.548, grad_norm=44.837, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.134, optim0_lr0=3.493e-04, train_time=2.478 +[gpub052:0/16] 2024-01-22 22:08:31,097 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub052:0/16] 2024-01-22 22:08:51,263 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 22:08:54,821 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 22:08:54,821 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub052:0/16] 2024-01-22 22:08:54,825 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 22:23:14,549 (trainer:737) INFO: 4epoch:train:5001-5100batch: iter_time=7.263, forward_time=0.385, loss_ctc=93.789, loss_att=116.610, acc=0.498, loss=109.764, backward_time=0.429, grad_norm=49.035, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.118, optim0_lr0=3.508e-04, train_time=9.037 +[gpub052:0/16] 2024-01-22 22:26:28,591 (trainer:737) INFO: 4epoch:train:5101-5200batch: iter_time=8.244e-05, forward_time=0.466, loss_ctc=99.626, loss_att=102.109, acc=0.514, loss=101.364, backward_time=0.486, grad_norm=54.983, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.101, optim0_lr0=3.523e-04, train_time=1.941 +[gpub052:0/16] 2024-01-22 22:29:28,188 (trainer:737) INFO: 4epoch:train:5201-5300batch: iter_time=8.190e-05, forward_time=0.290, loss_ctc=94.575, loss_att=109.815, acc=0.471, loss=105.243, backward_time=0.400, grad_norm=47.880, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.538e-04, train_time=1.794 +[gpub052:0/16] 2024-01-22 22:33:00,047 (trainer:737) INFO: 4epoch:train:5301-5400batch: iter_time=8.412e-05, forward_time=0.492, loss_ctc=94.123, loss_att=100.733, acc=0.525, loss=98.750, backward_time=0.546, grad_norm=47.747, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.122, optim0_lr0=3.553e-04, train_time=2.119 +[gpub052:0/16] 2024-01-22 22:35:40,629 (trainer:737) INFO: 4epoch:train:5401-5500batch: iter_time=8.338e-05, forward_time=0.288, loss_ctc=87.863, loss_att=92.652, acc=0.498, loss=91.215, backward_time=0.400, grad_norm=47.081, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.568e-04, train_time=1.607 +[gpub052:0/16] 2024-01-22 22:38:34,712 (trainer:737) INFO: 4epoch:train:5501-5600batch: iter_time=8.148e-05, forward_time=0.381, loss_ctc=115.289, loss_att=123.442, acc=0.481, loss=120.996, backward_time=0.416, grad_norm=57.669, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.097, optim0_lr0=3.583e-04, train_time=1.740 +[gpub052:0/16] 2024-01-22 22:41:09,460 (trainer:737) INFO: 4epoch:train:5601-5700batch: iter_time=8.304e-05, forward_time=0.416, loss_ctc=96.330, loss_att=94.079, acc=0.501, loss=94.754, backward_time=0.431, grad_norm=47.167, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.102, optim0_lr0=3.598e-04, train_time=1.547 +[gpub052:0/16] 2024-01-22 22:44:18,086 (trainer:737) INFO: 4epoch:train:5701-5800batch: iter_time=8.223e-05, forward_time=0.292, loss_ctc=95.855, loss_att=106.560, acc=0.492, loss=103.349, backward_time=0.400, grad_norm=48.587, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.613e-04, train_time=1.887 +[gpub052:0/16] 2024-01-22 22:47:28,037 (trainer:737) INFO: 4epoch:train:5801-5900batch: iter_time=7.072e-04, forward_time=0.323, loss_ctc=111.353, loss_att=125.036, acc=0.486, loss=120.931, backward_time=0.438, grad_norm=54.065, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.106, optim0_lr0=3.628e-04, train_time=1.897 +[gpub052:0/16] 2024-01-22 22:50:17,499 (trainer:737) INFO: 4epoch:train:5901-6000batch: iter_time=2.015e-04, forward_time=0.391, loss_ctc=86.613, loss_att=98.841, acc=0.528, loss=95.173, backward_time=0.463, grad_norm=42.601, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.103, optim0_lr0=3.643e-04, train_time=1.694 +[gpub052:0/16] 2024-01-22 22:52:50,124 (trainer:737) INFO: 4epoch:train:6001-6100batch: iter_time=8.522e-05, forward_time=0.289, loss_ctc=89.656, loss_att=83.830, acc=0.531, loss=85.578, backward_time=0.401, grad_norm=49.047, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.658e-04, train_time=1.528 +[gpub052:0/16] 2024-01-22 22:55:29,586 (trainer:737) INFO: 4epoch:train:6101-6200batch: iter_time=9.861e-05, forward_time=0.385, loss_ctc=102.780, loss_att=98.541, acc=0.512, loss=99.813, backward_time=0.445, grad_norm=54.908, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.104, optim0_lr0=3.673e-04, train_time=1.593 +[gpub052:0/16] 2024-01-22 22:57:04,857 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub052:0/16] 2024-01-22 22:57:24,535 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 22:57:28,099 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 22:57:28,099 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub052:0/16] 2024-01-22 22:57:28,103 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 23:03:57,961 (trainer:737) INFO: 4epoch:train:6201-6300batch: iter_time=3.631, forward_time=0.309, loss_ctc=88.536, loss_att=101.125, acc=0.513, loss=97.349, backward_time=0.403, grad_norm=46.211, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.688e-04, train_time=5.084 +[gpub052:0/16] 2024-01-22 23:06:19,112 (trainer:737) INFO: 4epoch:train:6301-6400batch: iter_time=8.389e-05, forward_time=0.429, loss_ctc=98.150, loss_att=102.898, acc=0.534, loss=101.474, backward_time=0.484, grad_norm=48.551, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.105, optim0_lr0=3.703e-04, train_time=1.410 +[gpub052:0/16] 2024-01-22 23:08:51,494 (trainer:737) INFO: 4epoch:train:6401-6500batch: iter_time=8.164e-05, forward_time=0.292, loss_ctc=94.739, loss_att=111.201, acc=0.493, loss=106.263, backward_time=0.404, grad_norm=49.175, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.718e-04, train_time=1.524 +[gpub052:0/16] 2024-01-22 23:11:49,346 (trainer:737) INFO: 4epoch:train:6501-6600batch: iter_time=8.837e-05, forward_time=0.441, loss_ctc=86.454, loss_att=94.895, acc=0.505, loss=92.363, backward_time=0.462, grad_norm=45.219, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.104, optim0_lr0=3.733e-04, train_time=1.778 +[gpub052:0/16] 2024-01-22 23:13:46,735 (trainer:737) INFO: 4epoch:train:6601-6700batch: iter_time=8.107e-05, forward_time=0.309, loss_ctc=95.104, loss_att=91.264, acc=0.538, loss=92.416, backward_time=0.405, grad_norm=46.611, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.748e-04, train_time=1.173 +[gpub052:0/16] 2024-01-22 23:17:08,143 (trainer:737) INFO: 4epoch:train:6701-6800batch: iter_time=8.314e-05, forward_time=0.511, loss_ctc=91.403, loss_att=92.167, acc=0.500, loss=91.938, backward_time=0.444, grad_norm=51.462, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.105, optim0_lr0=3.763e-04, train_time=2.015 +[gpub052:0/16] 2024-01-22 23:19:41,507 (trainer:737) INFO: 4epoch:train:6801-6900batch: iter_time=8.437e-05, forward_time=0.292, loss_ctc=109.369, loss_att=115.948, acc=0.488, loss=113.974, backward_time=0.406, grad_norm=54.088, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.095, optim0_lr0=3.778e-04, train_time=1.533 +[gpub052:0/16] 2024-01-22 23:22:11,030 (trainer:737) INFO: 4epoch:train:6901-7000batch: iter_time=5.844e-04, forward_time=0.478, loss_ctc=100.250, loss_att=95.949, acc=0.513, loss=97.240, backward_time=0.450, grad_norm=49.970, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.113, optim0_lr0=3.793e-04, train_time=1.495 +[gpub052:0/16] 2024-01-22 23:25:06,255 (trainer:737) INFO: 4epoch:train:7001-7100batch: iter_time=7.834e-04, forward_time=0.380, loss_ctc=107.739, loss_att=122.664, acc=0.493, loss=118.186, backward_time=0.410, grad_norm=50.642, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.097, optim0_lr0=3.808e-04, train_time=1.752 +[gpub052:0/16] 2024-01-22 23:27:34,889 (trainer:737) INFO: 4epoch:train:7101-7200batch: iter_time=8.247e-05, forward_time=0.402, loss_ctc=85.231, loss_att=91.194, acc=0.516, loss=89.405, backward_time=0.460, grad_norm=43.709, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.100, optim0_lr0=3.823e-04, train_time=1.486 +[gpub052:0/16] 2024-01-22 23:30:14,558 (trainer:737) INFO: 4epoch:train:7201-7300batch: iter_time=6.471e-04, forward_time=0.347, loss_ctc=84.599, loss_att=93.386, acc=0.536, loss=90.750, backward_time=0.408, grad_norm=41.795, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.131, optim0_lr0=3.838e-04, train_time=1.595 +[gpub052:0/16] 2024-01-22 23:32:28,236 (trainer:737) INFO: 4epoch:train:7301-7400batch: iter_time=8.220e-05, forward_time=0.409, loss_ctc=101.760, loss_att=89.221, acc=0.535, loss=92.983, backward_time=0.456, grad_norm=53.786, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.100, optim0_lr0=3.853e-04, train_time=1.338 +[gpub052:0/16] 2024-01-22 23:35:20,564 (trainer:737) INFO: 4epoch:train:7401-7500batch: iter_time=8.243e-05, forward_time=0.290, loss_ctc=89.042, loss_att=88.473, acc=0.547, loss=88.644, backward_time=0.401, grad_norm=43.756, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.096, optim0_lr0=3.868e-04, train_time=1.723 +[gpub052:0/16] 2024-01-22 23:35:41,235 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub052:0/16] 2024-01-22 23:36:00,908 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-22 23:36:04,535 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-22 23:36:04,535 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub052:0/16] 2024-01-22 23:36:04,687 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-22 23:44:51,086 (trainer:737) INFO: 4epoch:train:7501-7600batch: iter_time=4.421, forward_time=0.398, loss_ctc=91.693, loss_att=117.298, acc=0.512, loss=109.616, backward_time=0.423, grad_norm=49.420, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.100, optim0_lr0=3.883e-04, train_time=5.704 +[gpub052:0/16] 2024-01-22 23:47:50,424 (trainer:737) INFO: 4epoch:train:7601-7700batch: iter_time=8.357e-05, forward_time=0.448, loss_ctc=96.592, loss_att=104.234, acc=0.527, loss=101.941, backward_time=0.475, grad_norm=54.486, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.116, optim0_lr0=3.898e-04, train_time=1.792 +[gpub052:0/16] 2024-01-22 23:50:25,150 (trainer:737) INFO: 4epoch:train:7701-7800batch: iter_time=8.007e-05, forward_time=0.292, loss_ctc=92.201, loss_att=107.201, acc=0.493, loss=102.701, backward_time=0.400, grad_norm=47.784, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.096, optim0_lr0=3.913e-04, train_time=1.549 +[gpub052:0/16] 2024-01-22 23:53:43,779 (trainer:737) INFO: 4epoch:train:7801-7900batch: iter_time=8.276e-05, forward_time=0.393, loss_ctc=91.918, loss_att=97.674, acc=0.546, loss=95.947, backward_time=0.588, grad_norm=47.438, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.109, optim0_lr0=3.928e-04, train_time=1.982 +[gpub052:0/16] 2024-01-22 23:55:51,311 (trainer:737) INFO: 4epoch:train:7901-8000batch: iter_time=8.288e-05, forward_time=0.290, loss_ctc=84.223, loss_att=88.307, acc=0.530, loss=87.082, backward_time=0.403, grad_norm=44.499, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.095, optim0_lr0=3.943e-04, train_time=1.279 +[gpub052:0/16] 2024-01-23 00:00:39,060 (trainer:737) INFO: 4epoch:train:8001-8100batch: iter_time=1.849e-04, forward_time=0.464, loss_ctc=114.189, loss_att=122.838, acc=0.498, loss=120.243, backward_time=0.448, grad_norm=60.089, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.111, optim0_lr0=3.958e-04, train_time=2.874 +[gpub052:0/16] 2024-01-23 00:03:38,914 (trainer:737) INFO: 4epoch:train:8101-8200batch: iter_time=8.292e-05, forward_time=0.296, loss_ctc=93.819, loss_att=92.228, acc=0.521, loss=92.705, backward_time=0.399, grad_norm=47.032, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.095, optim0_lr0=3.973e-04, train_time=1.802 +[gpub052:0/16] 2024-01-23 00:06:44,546 (trainer:737) INFO: 4epoch:train:8201-8300batch: iter_time=8.289e-05, forward_time=0.292, loss_ctc=92.574, loss_att=105.699, acc=0.509, loss=101.761, backward_time=0.406, grad_norm=46.750, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.094, optim0_lr0=3.988e-04, train_time=1.856 +[gpub052:0/16] 2024-01-23 00:09:50,177 (trainer:737) INFO: 4epoch:train:8301-8400batch: iter_time=0.003, forward_time=0.414, loss_ctc=108.082, loss_att=120.372, acc=0.518, loss=116.685, backward_time=0.506, grad_norm=52.063, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.124, optim0_lr0=4.003e-04, train_time=1.854 +[gpub052:0/16] 2024-01-23 00:11:55,875 (trainer:737) INFO: 4epoch:train:8401-8500batch: iter_time=8.434e-05, forward_time=0.295, loss_ctc=85.123, loss_att=98.247, acc=0.550, loss=94.310, backward_time=0.408, grad_norm=42.644, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.095, optim0_lr0=4.018e-04, train_time=1.259 +[gpub052:0/16] 2024-01-23 00:15:02,191 (trainer:737) INFO: 4epoch:train:8501-8600batch: iter_time=7.658e-04, forward_time=0.493, loss_ctc=86.554, loss_att=79.550, acc=0.550, loss=81.651, backward_time=0.461, grad_norm=48.684, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.125, optim0_lr0=4.033e-04, train_time=1.863 +[gpub052:0/16] 2024-01-23 00:17:52,835 (trainer:737) INFO: 4epoch:train:8601-8700batch: iter_time=8.416e-05, forward_time=0.291, loss_ctc=100.038, loss_att=94.306, acc=0.531, loss=96.026, backward_time=0.401, grad_norm=53.271, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.094, optim0_lr0=4.048e-04, train_time=1.704 +[gpub052:0/16] 2024-01-23 00:19:11,773 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub052:0/16] 2024-01-23 00:19:31,840 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 00:19:35,652 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 00:19:35,652 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub052:0/16] 2024-01-23 00:19:35,655 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 00:27:46,403 (trainer:737) INFO: 4epoch:train:8701-8800batch: iter_time=4.514, forward_time=0.404, loss_ctc=86.177, loss_att=99.775, acc=0.532, loss=95.696, backward_time=0.423, grad_norm=45.331, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.098, optim0_lr0=4.063e-04, train_time=5.938 +[gpub052:0/16] 2024-01-23 00:30:09,059 (trainer:737) INFO: 4epoch:train:8801-8900batch: iter_time=8.262e-05, forward_time=0.294, loss_ctc=97.066, loss_att=103.981, acc=0.548, loss=101.907, backward_time=0.407, grad_norm=52.488, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.095, optim0_lr0=4.078e-04, train_time=1.426 +[gpub052:0/16] 2024-01-23 00:32:51,065 (trainer:737) INFO: 4epoch:train:8901-9000batch: iter_time=8.248e-05, forward_time=0.291, loss_ctc=93.543, loss_att=112.783, acc=0.506, loss=107.011, backward_time=0.405, grad_norm=51.911, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.095, optim0_lr0=4.093e-04, train_time=1.617 +[gpub052:0/16] 2024-01-23 00:35:07,460 (trainer:737) INFO: 4epoch:train:9001-9100batch: iter_time=8.220e-05, forward_time=0.463, loss_ctc=86.226, loss_att=91.458, acc=0.521, loss=89.889, backward_time=0.454, grad_norm=45.118, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.101, optim0_lr0=4.108e-04, train_time=1.366 +[gpub052:0/16] 2024-01-23 00:37:33,724 (trainer:737) INFO: 4epoch:train:9101-9200batch: iter_time=8.454e-05, forward_time=0.293, loss_ctc=92.898, loss_att=87.681, acc=0.564, loss=89.246, backward_time=0.404, grad_norm=43.941, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.095, optim0_lr0=4.123e-04, train_time=1.462 +[gpub052:0/16] 2024-01-23 00:40:05,460 (trainer:737) INFO: 4epoch:train:9201-9300batch: iter_time=8.692e-05, forward_time=0.290, loss_ctc=87.955, loss_att=88.013, acc=0.530, loss=87.996, backward_time=0.402, grad_norm=45.018, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.095, optim0_lr0=4.138e-04, train_time=1.517 +[gpub052:0/16] 2024-01-23 00:42:31,065 (trainer:737) INFO: 4epoch:train:9301-9400batch: iter_time=5.661e-04, forward_time=0.392, loss_ctc=104.866, loss_att=116.544, acc=0.500, loss=113.040, backward_time=0.429, grad_norm=52.491, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.112, optim0_lr0=4.153e-04, train_time=1.456 +[gpub052:0/16] 2024-01-23 00:44:23,172 (trainer:737) INFO: 4epoch:train:9401-9500batch: iter_time=8.434e-05, forward_time=0.292, loss_ctc=98.099, loss_att=95.608, acc=0.529, loss=96.355, backward_time=0.405, grad_norm=46.088, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.094, optim0_lr0=4.168e-04, train_time=1.121 +[gpub052:0/16] 2024-01-23 00:47:00,057 (trainer:737) INFO: 4epoch:train:9501-9600batch: iter_time=8.944e-05, forward_time=0.295, loss_ctc=104.596, loss_att=120.023, acc=0.520, loss=115.395, backward_time=0.406, grad_norm=47.808, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.094, optim0_lr0=4.183e-04, train_time=1.569 +[gpub052:0/16] 2024-01-23 00:49:11,918 (trainer:737) INFO: 4epoch:train:9601-9700batch: iter_time=2.781e-04, forward_time=0.374, loss_ctc=83.526, loss_att=88.598, acc=0.542, loss=87.076, backward_time=0.439, grad_norm=42.397, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.102, optim0_lr0=4.198e-04, train_time=1.318 +[gpub052:0/16] 2024-01-23 00:51:25,322 (trainer:737) INFO: 4epoch:train:9701-9800batch: iter_time=8.362e-05, forward_time=0.315, loss_ctc=82.011, loss_att=91.465, acc=0.550, loss=88.629, backward_time=0.410, grad_norm=41.660, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.097, optim0_lr0=4.213e-04, train_time=1.334 +[gpub052:0/16] 2024-01-23 00:53:40,762 (trainer:737) INFO: 4epoch:train:9801-9900batch: iter_time=8.695e-05, forward_time=0.292, loss_ctc=98.894, loss_att=85.480, acc=0.554, loss=89.504, backward_time=0.405, grad_norm=52.554, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.094, optim0_lr0=4.228e-04, train_time=1.354 +[gpub052:0/16] 2024-01-23 00:55:49,226 (trainer:737) INFO: 4epoch:train:9901-10000batch: iter_time=8.038e-05, forward_time=0.294, loss_ctc=87.310, loss_att=85.261, acc=0.569, loss=85.876, backward_time=0.405, grad_norm=42.480, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.095, optim0_lr0=4.243e-04, train_time=1.284 +[gpub052:0/16] 2024-01-23 00:56:09,264 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub052:0/16] 2024-01-23 00:56:29,051 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 00:56:32,665 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 00:56:32,665 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub052:0/16] 2024-01-23 00:56:32,668 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 01:03:42,631 (trainer:737) INFO: 4epoch:train:10001-10100batch: iter_time=3.466, forward_time=0.370, loss_ctc=89.222, loss_att=107.865, acc=0.528, loss=102.272, backward_time=0.418, grad_norm=46.803, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.097, optim0_lr0=4.258e-04, train_time=4.734 +[gpub052:0/16] 2024-01-23 01:06:29,429 (trainer:737) INFO: 4epoch:train:10101-10200batch: iter_time=8.282e-05, forward_time=0.422, loss_ctc=93.040, loss_att=93.723, acc=0.541, loss=93.518, backward_time=0.442, grad_norm=54.430, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.099, optim0_lr0=4.273e-04, train_time=1.668 +[gpub052:0/16] 2024-01-23 01:08:27,150 (trainer:737) INFO: 4epoch:train:10201-10300batch: iter_time=8.045e-05, forward_time=0.291, loss_ctc=89.762, loss_att=101.653, acc=0.500, loss=98.086, backward_time=0.405, grad_norm=47.987, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.095, optim0_lr0=4.288e-04, train_time=1.177 +[gpub052:0/16] 2024-01-23 01:10:40,718 (trainer:737) INFO: 4epoch:train:10301-10400batch: iter_time=8.546e-05, forward_time=0.386, loss_ctc=90.310, loss_att=93.012, acc=0.554, loss=92.201, backward_time=0.454, grad_norm=45.617, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.101, optim0_lr0=4.303e-04, train_time=1.335 +[gpub052:0/16] 2024-01-23 01:13:08,954 (trainer:737) INFO: 4epoch:train:10401-10500batch: iter_time=8.017e-05, forward_time=0.289, loss_ctc=82.157, loss_att=84.706, acc=0.527, loss=83.941, backward_time=0.401, grad_norm=44.339, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.095, optim0_lr0=4.318e-04, train_time=1.482 +[gpub052:0/16] 2024-01-23 01:15:31,727 (trainer:737) INFO: 4epoch:train:10501-10600batch: iter_time=0.004, forward_time=0.385, loss_ctc=113.520, loss_att=115.842, acc=0.507, loss=115.145, backward_time=0.437, grad_norm=57.745, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.104, optim0_lr0=4.333e-04, train_time=1.427 +[gpub052:0/16] 2024-01-23 01:17:40,074 (trainer:737) INFO: 4epoch:train:10601-10700batch: iter_time=7.997e-05, forward_time=0.292, loss_ctc=92.596, loss_att=86.315, acc=0.532, loss=88.199, backward_time=0.409, grad_norm=46.347, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.095, optim0_lr0=4.348e-04, train_time=1.284 +[gpub052:0/16] 2024-01-23 01:19:51,948 (trainer:737) INFO: 4epoch:train:10701-10800batch: iter_time=9.494e-04, forward_time=0.392, loss_ctc=89.790, loss_att=98.552, acc=0.519, loss=95.923, backward_time=0.444, grad_norm=44.291, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.103, optim0_lr0=4.363e-04, train_time=1.318 +[gpub052:0/16] 2024-01-23 01:22:27,689 (trainer:737) INFO: 4epoch:train:10801-10900batch: iter_time=8.190e-05, forward_time=0.304, loss_ctc=106.404, loss_att=116.323, acc=0.513, loss=113.348, backward_time=0.415, grad_norm=53.409, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.100, optim0_lr0=4.378e-04, train_time=1.558 +[gpub052:0/16] 2024-01-23 01:24:32,956 (trainer:737) INFO: 4epoch:train:10901-11000batch: iter_time=4.484e-04, forward_time=0.364, loss_ctc=82.099, loss_att=91.972, acc=0.557, loss=89.010, backward_time=0.446, grad_norm=39.578, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.102, optim0_lr0=4.393e-04, train_time=1.253 +[gpub052:0/16] 2024-01-23 01:27:09,080 (trainer:737) INFO: 4epoch:train:11001-11100batch: iter_time=0.003, forward_time=0.425, loss_ctc=84.439, loss_att=75.467, acc=0.562, loss=78.159, backward_time=0.427, grad_norm=46.160, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.134, optim0_lr0=4.408e-04, train_time=1.560 +[gpub052:0/16] 2024-01-23 01:29:26,763 (trainer:737) INFO: 4epoch:train:11101-11200batch: iter_time=9.203e-05, forward_time=0.292, loss_ctc=97.454, loss_att=89.872, acc=0.536, loss=92.146, backward_time=0.403, grad_norm=49.376, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.094, optim0_lr0=4.423e-04, train_time=1.378 +[gpub052:0/16] 2024-01-23 01:30:41,365 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub052:0/16] 2024-01-23 01:31:01,012 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 01:31:04,997 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 01:31:04,997 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub052:0/16] 2024-01-23 01:31:05,000 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 01:37:07,071 (trainer:737) INFO: 4epoch:train:11201-11300batch: iter_time=3.402, forward_time=0.369, loss_ctc=83.364, loss_att=95.183, acc=0.543, loss=91.637, backward_time=0.417, grad_norm=41.113, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.096, optim0_lr0=4.438e-04, train_time=4.603 +[gpub052:0/16] 2024-01-23 01:39:21,382 (trainer:737) INFO: 4epoch:train:11301-11400batch: iter_time=2.171e-04, forward_time=0.370, loss_ctc=93.608, loss_att=101.509, acc=0.557, loss=99.139, backward_time=0.464, grad_norm=50.042, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.108, optim0_lr0=4.453e-04, train_time=1.342 +[gpub052:0/16] 2024-01-23 01:41:50,607 (trainer:737) INFO: 4epoch:train:11401-11500batch: iter_time=9.373e-05, forward_time=0.292, loss_ctc=92.097, loss_att=109.062, acc=0.519, loss=103.973, backward_time=0.402, grad_norm=51.847, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.093, optim0_lr0=4.468e-04, train_time=1.492 +[gpub052:0/16] 2024-01-23 01:43:56,402 (trainer:737) INFO: 4epoch:train:11501-11600batch: iter_time=8.740e-05, forward_time=0.319, loss_ctc=82.907, loss_att=87.809, acc=0.538, loss=86.338, backward_time=0.404, grad_norm=42.936, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.095, optim0_lr0=4.483e-04, train_time=1.258 +[gpub052:0/16] 2024-01-23 01:46:34,468 (trainer:737) INFO: 4epoch:train:11601-11700batch: iter_time=8.983e-05, forward_time=0.407, loss_ctc=91.553, loss_att=85.445, acc=0.575, loss=87.278, backward_time=0.432, grad_norm=42.382, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.098, optim0_lr0=4.498e-04, train_time=1.580 +[gpub052:0/16] 2024-01-23 01:48:45,279 (trainer:737) INFO: 4epoch:train:11701-11800batch: iter_time=8.724e-05, forward_time=0.291, loss_ctc=86.886, loss_att=85.596, acc=0.541, loss=85.983, backward_time=0.402, grad_norm=44.433, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.093, optim0_lr0=4.513e-04, train_time=1.308 +[gpub052:0/16] 2024-01-23 01:50:46,773 (trainer:737) INFO: 4epoch:train:11801-11900batch: iter_time=3.717e-04, forward_time=0.301, loss_ctc=102.803, loss_att=111.360, acc=0.515, loss=108.793, backward_time=0.408, grad_norm=55.076, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.095, optim0_lr0=4.528e-04, train_time=1.214 +[gpub052:0/16] 2024-01-23 01:53:23,494 (trainer:737) INFO: 4epoch:train:11901-12000batch: iter_time=8.558e-05, forward_time=0.345, loss_ctc=96.310, loss_att=93.305, acc=0.538, loss=94.207, backward_time=0.472, grad_norm=47.639, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.097, optim0_lr0=4.543e-04, train_time=1.567 +[gpub052:0/16] 2024-01-23 01:55:17,195 (trainer:737) INFO: 4epoch:train:12001-12100batch: iter_time=8.692e-05, forward_time=0.295, loss_ctc=103.209, loss_att=118.404, acc=0.525, loss=113.846, backward_time=0.408, grad_norm=49.674, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.094, optim0_lr0=4.558e-04, train_time=1.137 +[gpub052:0/16] 2024-01-23 01:57:53,294 (trainer:737) INFO: 4epoch:train:12101-12200batch: iter_time=8.537e-05, forward_time=0.391, loss_ctc=80.801, loss_att=84.392, acc=0.559, loss=83.315, backward_time=0.440, grad_norm=40.552, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.106, optim0_lr0=4.573e-04, train_time=1.561 +[gpub052:0/16] 2024-01-23 01:59:58,767 (trainer:737) INFO: 4epoch:train:12201-12300batch: iter_time=9.027e-05, forward_time=0.295, loss_ctc=80.373, loss_att=88.191, acc=0.562, loss=85.845, backward_time=0.409, grad_norm=40.897, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.094, optim0_lr0=4.588e-04, train_time=1.254 +[gpub052:0/16] 2024-01-23 02:02:11,235 (trainer:737) INFO: 4epoch:train:12301-12400batch: iter_time=8.305e-05, forward_time=0.292, loss_ctc=97.569, loss_att=83.281, acc=0.561, loss=87.567, backward_time=0.404, grad_norm=49.784, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.094, optim0_lr0=4.603e-04, train_time=1.325 +[gpub052:0/16] 2024-01-23 02:04:23,365 (trainer:737) INFO: 4epoch:train:12401-12500batch: iter_time=2.886e-04, forward_time=0.337, loss_ctc=84.552, loss_att=81.919, acc=0.577, loss=82.709, backward_time=0.456, grad_norm=40.301, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.101, optim0_lr0=4.618e-04, train_time=1.321 +[gpub052:0/16] 2024-01-23 02:04:43,638 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub052:0/16] 2024-01-23 02:05:03,695 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 02:05:07,835 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 02:05:07,835 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub052:0/16] 2024-01-23 02:05:07,838 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 02:12:17,563 (trainer:737) INFO: 4epoch:train:12501-12600batch: iter_time=3.466, forward_time=0.333, loss_ctc=87.561, loss_att=103.233, acc=0.547, loss=98.531, backward_time=0.408, grad_norm=43.080, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.094, optim0_lr0=4.633e-04, train_time=4.741 +[gpub052:0/16] 2024-01-23 02:14:31,372 (trainer:737) INFO: 4epoch:train:12601-12700batch: iter_time=8.750e-05, forward_time=0.369, loss_ctc=90.580, loss_att=92.173, acc=0.560, loss=91.695, backward_time=0.505, grad_norm=49.192, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.104, optim0_lr0=4.648e-04, train_time=1.338 +[gpub052:0/16] 2024-01-23 02:17:10,620 (trainer:737) INFO: 4epoch:train:12701-12800batch: iter_time=9.257e-05, forward_time=0.347, loss_ctc=87.738, loss_att=97.826, acc=0.519, loss=94.800, backward_time=0.460, grad_norm=42.790, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.098, optim0_lr0=4.663e-04, train_time=1.592 +[gpub052:0/16] 2024-01-23 02:19:21,029 (trainer:737) INFO: 4epoch:train:12801-12900batch: iter_time=9.124e-05, forward_time=0.362, loss_ctc=88.650, loss_att=89.781, acc=0.572, loss=89.442, backward_time=0.456, grad_norm=45.283, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.098, optim0_lr0=4.678e-04, train_time=1.304 +[gpub052:0/16] 2024-01-23 02:21:24,409 (trainer:737) INFO: 4epoch:train:12901-13000batch: iter_time=9.069e-05, forward_time=0.333, loss_ctc=80.907, loss_att=79.333, acc=0.563, loss=79.805, backward_time=0.473, grad_norm=39.611, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.100, optim0_lr0=4.693e-04, train_time=1.234 +[gpub052:0/16] 2024-01-23 02:24:04,408 (trainer:737) INFO: 4epoch:train:13001-13100batch: iter_time=8.777e-05, forward_time=0.337, loss_ctc=108.535, loss_att=115.590, acc=0.518, loss=113.473, backward_time=0.432, grad_norm=58.530, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.096, optim0_lr0=4.708e-04, train_time=1.600 +[gpub052:0/16] 2024-01-23 02:26:18,104 (trainer:737) INFO: 4epoch:train:13101-13200batch: iter_time=3.476e-04, forward_time=0.413, loss_ctc=90.315, loss_att=84.062, acc=0.549, loss=85.938, backward_time=0.461, grad_norm=41.655, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.100, optim0_lr0=4.723e-04, train_time=1.336 +[gpub052:0/16] 2024-01-23 02:28:40,391 (trainer:737) INFO: 4epoch:train:13201-13300batch: iter_time=8.341e-04, forward_time=0.388, loss_ctc=87.435, loss_att=97.041, acc=0.535, loss=94.159, backward_time=0.447, grad_norm=42.335, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.097, optim0_lr0=4.738e-04, train_time=1.424 +[gpub052:0/16] 2024-01-23 02:31:09,298 (trainer:737) INFO: 4epoch:train:13301-13400batch: iter_time=9.005e-05, forward_time=0.334, loss_ctc=103.023, loss_att=110.382, acc=0.547, loss=108.174, backward_time=0.406, grad_norm=46.410, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.094, optim0_lr0=4.753e-04, train_time=1.487 +[gpub052:0/16] 2024-01-23 02:33:26,737 (trainer:737) INFO: 4epoch:train:13401-13500batch: iter_time=3.277e-04, forward_time=0.396, loss_ctc=80.133, loss_att=87.581, acc=0.580, loss=85.347, backward_time=0.438, grad_norm=38.797, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.098, optim0_lr0=4.768e-04, train_time=1.375 +[gpub052:0/16] 2024-01-23 02:35:32,530 (trainer:737) INFO: 4epoch:train:13501-13600batch: iter_time=2.207e-04, forward_time=0.384, loss_ctc=83.451, loss_att=74.105, acc=0.577, loss=76.909, backward_time=0.434, grad_norm=42.986, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.101, optim0_lr0=4.783e-04, train_time=1.256 +[gpub052:0/16] 2024-01-23 02:38:15,564 (trainer:737) INFO: 4epoch:train:13601-13700batch: iter_time=4.028e-04, forward_time=0.364, loss_ctc=96.137, loss_att=86.721, acc=0.557, loss=89.546, backward_time=0.464, grad_norm=47.183, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.105, optim0_lr0=4.798e-04, train_time=1.632 +[gpub052:0/16] 2024-01-23 02:39:37,547 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub052:0/16] 2024-01-23 02:39:58,223 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 02:40:02,346 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 02:40:02,346 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub052:0/16] 2024-01-23 02:40:02,350 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 02:46:12,800 (trainer:737) INFO: 4epoch:train:13701-13800batch: iter_time=3.383, forward_time=0.450, loss_ctc=81.573, loss_att=91.915, acc=0.555, loss=88.812, backward_time=0.444, grad_norm=40.145, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.102, optim0_lr0=4.813e-04, train_time=4.771 +[gpub052:0/16] 2024-01-23 02:49:11,655 (trainer:737) INFO: 4epoch:train:13801-13900batch: iter_time=8.766e-05, forward_time=0.516, loss_ctc=91.227, loss_att=94.890, acc=0.566, loss=93.791, backward_time=0.478, grad_norm=46.014, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.113, optim0_lr0=4.828e-04, train_time=1.789 +[gpub052:0/16] 2024-01-23 02:51:59,203 (trainer:737) INFO: 4epoch:train:13901-14000batch: iter_time=3.368e-04, forward_time=0.452, loss_ctc=85.831, loss_att=101.515, acc=0.528, loss=96.809, backward_time=0.517, grad_norm=46.916, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.119, optim0_lr0=4.843e-04, train_time=1.675 +[gpub052:0/16] 2024-01-23 02:54:08,238 (trainer:737) INFO: 4epoch:train:14001-14100batch: iter_time=3.188e-04, forward_time=0.384, loss_ctc=79.802, loss_att=86.091, acc=0.539, loss=84.204, backward_time=0.443, grad_norm=40.048, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.106, optim0_lr0=4.858e-04, train_time=1.291 +[gpub052:0/16] 2024-01-23 02:56:52,007 (trainer:737) INFO: 4epoch:train:14101-14200batch: iter_time=8.703e-05, forward_time=0.352, loss_ctc=88.946, loss_att=82.017, acc=0.574, loss=84.096, backward_time=0.414, grad_norm=43.309, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.098, optim0_lr0=4.873e-04, train_time=1.637 +[gpub052:0/16] 2024-01-23 02:59:12,146 (trainer:737) INFO: 4epoch:train:14201-14300batch: iter_time=9.241e-04, forward_time=0.415, loss_ctc=85.470, loss_att=84.062, acc=0.535, loss=84.484, backward_time=0.487, grad_norm=44.762, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.114, optim0_lr0=4.888e-04, train_time=1.401 +[gpub052:0/16] 2024-01-23 03:01:17,668 (trainer:737) INFO: 4epoch:train:14301-14400batch: iter_time=8.424e-05, forward_time=0.366, loss_ctc=101.489, loss_att=105.491, acc=0.520, loss=104.290, backward_time=0.436, grad_norm=53.650, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.095, optim0_lr0=4.903e-04, train_time=1.255 +[gpub052:0/16] 2024-01-23 03:04:02,297 (trainer:737) INFO: 4epoch:train:14401-14500batch: iter_time=3.684e-04, forward_time=0.426, loss_ctc=93.228, loss_att=86.269, acc=0.549, loss=88.356, backward_time=0.445, grad_norm=44.661, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.099, optim0_lr0=4.918e-04, train_time=1.646 +[gpub052:0/16] 2024-01-23 03:06:27,507 (trainer:737) INFO: 4epoch:train:14501-14600batch: iter_time=6.367e-04, forward_time=0.426, loss_ctc=100.877, loss_att=113.739, acc=0.527, loss=109.880, backward_time=0.458, grad_norm=45.236, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.102, optim0_lr0=4.933e-04, train_time=1.452 +[gpub052:0/16] 2024-01-23 03:08:40,739 (trainer:737) INFO: 4epoch:train:14601-14700batch: iter_time=3.091e-04, forward_time=0.360, loss_ctc=78.817, loss_att=82.480, acc=0.551, loss=81.381, backward_time=0.465, grad_norm=38.789, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.103, optim0_lr0=4.948e-04, train_time=1.333 +[gpub052:0/16] 2024-01-23 03:11:01,395 (trainer:737) INFO: 4epoch:train:14701-14800batch: iter_time=8.456e-05, forward_time=0.305, loss_ctc=77.619, loss_att=84.948, acc=0.566, loss=82.749, backward_time=0.415, grad_norm=38.742, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.095, optim0_lr0=4.963e-04, train_time=1.405 +[gpub052:0/16] 2024-01-23 03:13:25,432 (trainer:737) INFO: 4epoch:train:14801-14900batch: iter_time=3.351e-04, forward_time=0.401, loss_ctc=93.715, loss_att=79.288, acc=0.571, loss=83.616, backward_time=0.437, grad_norm=49.759, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.104, optim0_lr0=4.978e-04, train_time=1.441 +[gpub052:0/16] 2024-01-23 03:15:44,765 (trainer:737) INFO: 4epoch:train:14901-15000batch: iter_time=2.645e-04, forward_time=0.380, loss_ctc=82.589, loss_att=79.064, acc=0.584, loss=80.121, backward_time=0.437, grad_norm=40.893, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.105, optim0_lr0=4.993e-04, train_time=1.392 +[gpub052:0/16] 2024-01-23 03:53:45,497 (trainer:343) INFO: 4epoch results: [train] iter_time=0.405, forward_time=0.374, loss_ctc=95.570, loss_att=102.883, acc=0.513, loss=100.689, backward_time=0.441, grad_norm=48.726, clip=100.000, loss_scale=9.328e+12, optim_step_time=0.102, optim0_lr0=3.875e-04, train_time=2.057, time=8 hours, 34 minutes and 45.4 seconds, total_count=60000, gpu_max_cached_mem_GB=40.771, [valid] loss_ctc=91.410, cer_ctc=0.434, loss_att=79.362, acc=0.457, cer=0.417, wer=1.000, loss=82.976, time=37 minutes and 36.86 seconds, total_count=18684, gpu_max_cached_mem_GB=40.771 +[gpub052:0/16] 2024-01-23 03:53:54,925 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub052:0/16] 2024-01-23 03:53:54,969 (trainer:272) INFO: 5/45epoch started. Estimated time to finish: 2 weeks, 4 days and 15 hours +[gpub052:0/16] 2024-01-23 03:53:54,979 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub052:0/16] 2024-01-23 03:54:13,627 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 03:54:17,014 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 03:54:17,014 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub052:0/16] 2024-01-23 03:54:17,017 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 04:00:52,948 (trainer:737) INFO: 5epoch:train:1-100batch: iter_time=2.965, forward_time=0.355, loss_ctc=106.744, loss_att=106.730, acc=0.531, loss=106.734, backward_time=0.414, grad_norm=57.903, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.096, optim0_lr0=4.998e-04, train_time=4.179 +[gpub052:0/16] 2024-01-23 04:03:12,073 (trainer:737) INFO: 5epoch:train:101-200batch: iter_time=4.598e-04, forward_time=0.397, loss_ctc=99.898, loss_att=104.771, acc=0.534, loss=103.309, backward_time=0.462, grad_norm=51.391, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.098, optim0_lr0=4.994e-04, train_time=1.392 +[gpub052:0/16] 2024-01-23 04:05:32,306 (trainer:737) INFO: 5epoch:train:201-300batch: iter_time=8.064e-05, forward_time=0.292, loss_ctc=84.953, loss_att=79.915, acc=0.557, loss=81.426, backward_time=0.401, grad_norm=48.799, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.095, optim0_lr0=4.990e-04, train_time=1.402 +[gpub052:0/16] 2024-01-23 04:07:49,275 (trainer:737) INFO: 5epoch:train:301-400batch: iter_time=8.155e-05, forward_time=0.414, loss_ctc=96.488, loss_att=99.226, acc=0.558, loss=98.405, backward_time=0.429, grad_norm=45.699, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.101, optim0_lr0=4.985e-04, train_time=1.370 +[gpub052:0/16] 2024-01-23 04:10:15,663 (trainer:737) INFO: 5epoch:train:401-500batch: iter_time=6.097e-04, forward_time=0.299, loss_ctc=88.470, loss_att=87.246, acc=0.565, loss=87.613, backward_time=0.406, grad_norm=43.560, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.098, optim0_lr0=4.981e-04, train_time=1.463 +[gpub052:0/16] 2024-01-23 04:12:27,056 (trainer:737) INFO: 5epoch:train:501-600batch: iter_time=8.321e-05, forward_time=0.375, loss_ctc=76.126, loss_att=80.102, acc=0.560, loss=78.909, backward_time=0.439, grad_norm=40.891, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.101, optim0_lr0=4.977e-04, train_time=1.314 +[gpub052:0/16] 2024-01-23 04:14:42,208 (trainer:737) INFO: 5epoch:train:601-700batch: iter_time=4.384e-04, forward_time=0.322, loss_ctc=91.302, loss_att=102.695, acc=0.550, loss=99.277, backward_time=0.413, grad_norm=42.201, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.096, optim0_lr0=4.973e-04, train_time=1.350 +[gpub052:0/16] 2024-01-23 04:17:05,706 (trainer:737) INFO: 5epoch:train:701-800batch: iter_time=8.076e-05, forward_time=0.388, loss_ctc=100.989, loss_att=92.581, acc=0.541, loss=95.104, backward_time=0.419, grad_norm=46.487, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.097, optim0_lr0=4.969e-04, train_time=1.436 +[gpub052:0/16] 2024-01-23 04:19:18,262 (trainer:737) INFO: 5epoch:train:801-900batch: iter_time=4.124e-04, forward_time=0.325, loss_ctc=88.392, loss_att=87.127, acc=0.581, loss=87.507, backward_time=0.432, grad_norm=39.209, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.096, optim0_lr0=4.965e-04, train_time=1.325 +[gpub052:0/16] 2024-01-23 04:21:34,509 (trainer:737) INFO: 5epoch:train:901-1000batch: iter_time=8.386e-05, forward_time=0.365, loss_ctc=84.610, loss_att=90.723, acc=0.531, loss=88.889, backward_time=0.451, grad_norm=42.970, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.099, optim0_lr0=4.961e-04, train_time=1.362 +[gpub052:0/16] 2024-01-23 04:24:09,166 (trainer:737) INFO: 5epoch:train:1001-1100batch: iter_time=3.230e-04, forward_time=0.346, loss_ctc=88.873, loss_att=96.819, acc=0.543, loss=94.435, backward_time=0.474, grad_norm=43.160, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.098, optim0_lr0=4.957e-04, train_time=1.545 +[gpub052:0/16] 2024-01-23 04:26:12,153 (trainer:737) INFO: 5epoch:train:1101-1200batch: iter_time=7.936e-05, forward_time=0.294, loss_ctc=97.265, loss_att=103.452, acc=0.555, loss=101.596, backward_time=0.406, grad_norm=47.344, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.094, optim0_lr0=4.953e-04, train_time=1.231 +[gpub052:0/16] 2024-01-23 04:27:43,879 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub052:0/16] 2024-01-23 04:28:03,561 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 04:28:07,324 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 04:28:07,324 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub052:0/16] 2024-01-23 04:28:07,328 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 04:34:08,545 (trainer:737) INFO: 5epoch:train:1201-1300batch: iter_time=3.323, forward_time=0.392, loss_ctc=104.861, loss_att=93.717, acc=0.560, loss=97.060, backward_time=0.428, grad_norm=51.442, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.098, optim0_lr0=4.949e-04, train_time=4.764 +[gpub052:0/16] 2024-01-23 04:36:14,142 (trainer:737) INFO: 5epoch:train:1301-1400batch: iter_time=7.910e-05, forward_time=0.294, loss_ctc=99.983, loss_att=100.005, acc=0.551, loss=99.998, backward_time=0.408, grad_norm=45.852, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.094, optim0_lr0=4.945e-04, train_time=1.256 +[gpub052:0/16] 2024-01-23 04:38:28,657 (trainer:737) INFO: 5epoch:train:1401-1500batch: iter_time=7.874e-05, forward_time=0.357, loss_ctc=73.937, loss_att=74.137, acc=0.571, loss=74.077, backward_time=0.456, grad_norm=37.153, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.103, optim0_lr0=4.941e-04, train_time=1.345 +[gpub052:0/16] 2024-01-23 04:40:39,221 (trainer:737) INFO: 5epoch:train:1501-1600batch: iter_time=7.831e-05, forward_time=0.295, loss_ctc=97.529, loss_att=93.171, acc=0.565, loss=94.478, backward_time=0.407, grad_norm=51.106, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.094, optim0_lr0=4.937e-04, train_time=1.306 +[gpub052:0/16] 2024-01-23 04:43:06,421 (trainer:737) INFO: 5epoch:train:1601-1700batch: iter_time=2.622e-04, forward_time=0.436, loss_ctc=87.552, loss_att=91.521, acc=0.584, loss=90.330, backward_time=0.435, grad_norm=40.140, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.107, optim0_lr0=4.933e-04, train_time=1.472 +[gpub052:0/16] 2024-01-23 04:45:09,170 (trainer:737) INFO: 5epoch:train:1701-1800batch: iter_time=7.955e-05, forward_time=0.289, loss_ctc=74.869, loss_att=78.914, acc=0.564, loss=77.701, backward_time=0.402, grad_norm=34.971, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.094, optim0_lr0=4.929e-04, train_time=1.227 +[gpub052:0/16] 2024-01-23 04:47:18,144 (trainer:737) INFO: 5epoch:train:1801-1900batch: iter_time=2.106e-04, forward_time=0.294, loss_ctc=80.179, loss_att=77.458, acc=0.584, loss=78.275, backward_time=0.406, grad_norm=39.389, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.095, optim0_lr0=4.925e-04, train_time=1.290 +[gpub052:0/16] 2024-01-23 04:49:27,130 (trainer:737) INFO: 5epoch:train:1901-2000batch: iter_time=8.808e-05, forward_time=0.395, loss_ctc=103.888, loss_att=107.047, acc=0.530, loss=106.100, backward_time=0.440, grad_norm=46.325, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.098, optim0_lr0=4.921e-04, train_time=1.289 +[gpub052:0/16] 2024-01-23 04:51:34,965 (trainer:737) INFO: 5epoch:train:2001-2100batch: iter_time=8.284e-05, forward_time=0.292, loss_ctc=83.518, loss_att=88.103, acc=0.580, loss=86.727, backward_time=0.405, grad_norm=37.456, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.917e-04, train_time=1.278 +[gpub052:0/16] 2024-01-23 04:53:55,439 (trainer:737) INFO: 5epoch:train:2101-2200batch: iter_time=8.278e-05, forward_time=0.347, loss_ctc=84.912, loss_att=79.054, acc=0.594, loss=80.812, backward_time=0.450, grad_norm=37.137, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.099, optim0_lr0=4.913e-04, train_time=1.405 +[gpub052:0/16] 2024-01-23 04:55:54,485 (trainer:737) INFO: 5epoch:train:2201-2300batch: iter_time=8.121e-05, forward_time=0.290, loss_ctc=77.555, loss_att=86.071, acc=0.528, loss=83.516, backward_time=0.402, grad_norm=39.969, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.909e-04, train_time=1.189 +[gpub052:0/16] 2024-01-23 04:58:02,441 (trainer:737) INFO: 5epoch:train:2301-2400batch: iter_time=8.540e-05, forward_time=0.299, loss_ctc=87.343, loss_att=103.261, acc=0.568, loss=98.485, backward_time=0.410, grad_norm=38.107, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.905e-04, train_time=1.279 +[gpub052:0/16] 2024-01-23 05:00:32,257 (trainer:737) INFO: 5epoch:train:2401-2500batch: iter_time=9.105e-05, forward_time=0.390, loss_ctc=101.952, loss_att=91.430, acc=0.567, loss=94.587, backward_time=0.440, grad_norm=48.432, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.099, optim0_lr0=4.901e-04, train_time=1.499 +[gpub052:0/16] 2024-01-23 05:00:52,408 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub052:0/16] 2024-01-23 05:01:12,159 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 05:01:15,990 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 05:01:15,990 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub052:0/16] 2024-01-23 05:01:15,993 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 05:07:52,519 (trainer:737) INFO: 5epoch:train:2501-2600batch: iter_time=3.160, forward_time=0.298, loss_ctc=97.833, loss_att=94.359, acc=0.557, loss=95.401, backward_time=0.408, grad_norm=47.020, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.095, optim0_lr0=4.897e-04, train_time=4.402 +[gpub052:0/16] 2024-01-23 05:10:09,389 (trainer:737) INFO: 5epoch:train:2601-2700batch: iter_time=9.440e-05, forward_time=0.415, loss_ctc=92.719, loss_att=96.603, acc=0.552, loss=95.438, backward_time=0.432, grad_norm=43.740, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.097, optim0_lr0=4.893e-04, train_time=1.368 +[gpub052:0/16] 2024-01-23 05:12:35,116 (trainer:737) INFO: 5epoch:train:2701-2800batch: iter_time=9.032e-05, forward_time=0.327, loss_ctc=79.102, loss_att=72.986, acc=0.586, loss=74.821, backward_time=0.454, grad_norm=41.362, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.099, optim0_lr0=4.889e-04, train_time=1.457 +[gpub052:0/16] 2024-01-23 05:14:36,163 (trainer:737) INFO: 5epoch:train:2801-2900batch: iter_time=8.446e-05, forward_time=0.312, loss_ctc=89.596, loss_att=91.159, acc=0.578, loss=90.690, backward_time=0.416, grad_norm=37.582, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.095, optim0_lr0=4.885e-04, train_time=1.211 +[gpub052:0/16] 2024-01-23 05:17:01,843 (trainer:737) INFO: 5epoch:train:2901-3000batch: iter_time=8.938e-05, forward_time=0.382, loss_ctc=83.350, loss_att=80.628, acc=0.583, loss=81.445, backward_time=0.457, grad_norm=38.646, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.103, optim0_lr0=4.881e-04, train_time=1.456 +[gpub052:0/16] 2024-01-23 05:19:01,621 (trainer:737) INFO: 5epoch:train:3001-3100batch: iter_time=9.013e-05, forward_time=0.292, loss_ctc=71.722, loss_att=76.641, acc=0.568, loss=75.165, backward_time=0.403, grad_norm=35.349, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.878e-04, train_time=1.197 +[gpub052:0/16] 2024-01-23 05:21:34,187 (trainer:737) INFO: 5epoch:train:3101-3200batch: iter_time=9.474e-05, forward_time=0.377, loss_ctc=85.303, loss_att=95.628, acc=0.565, loss=92.530, backward_time=0.439, grad_norm=39.764, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.101, optim0_lr0=4.874e-04, train_time=1.525 +[gpub052:0/16] 2024-01-23 05:23:39,101 (trainer:737) INFO: 5epoch:train:3201-3300batch: iter_time=7.913e-05, forward_time=0.291, loss_ctc=92.585, loss_att=85.120, acc=0.550, loss=87.359, backward_time=0.404, grad_norm=43.958, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.870e-04, train_time=1.249 +[gpub052:0/16] 2024-01-23 05:26:06,440 (trainer:737) INFO: 5epoch:train:3301-3400batch: iter_time=2.604e-04, forward_time=0.417, loss_ctc=84.116, loss_att=82.639, acc=0.592, loss=83.082, backward_time=0.439, grad_norm=37.032, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.102, optim0_lr0=4.866e-04, train_time=1.473 +[gpub052:0/16] 2024-01-23 05:28:18,721 (trainer:737) INFO: 5epoch:train:3401-3500batch: iter_time=8.664e-05, forward_time=0.292, loss_ctc=78.270, loss_att=85.644, acc=0.542, loss=83.432, backward_time=0.404, grad_norm=37.920, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.862e-04, train_time=1.322 +[gpub052:0/16] 2024-01-23 05:30:44,919 (trainer:737) INFO: 5epoch:train:3501-3600batch: iter_time=3.651e-04, forward_time=0.351, loss_ctc=83.851, loss_att=92.462, acc=0.553, loss=89.879, backward_time=0.467, grad_norm=40.505, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.100, optim0_lr0=4.858e-04, train_time=1.462 +[gpub052:0/16] 2024-01-23 05:32:43,250 (trainer:737) INFO: 5epoch:train:3601-3700batch: iter_time=8.349e-05, forward_time=0.301, loss_ctc=90.125, loss_att=97.798, acc=0.562, loss=95.496, backward_time=0.409, grad_norm=41.843, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.094, optim0_lr0=4.854e-04, train_time=1.183 +[gpub052:0/16] 2024-01-23 05:34:17,154 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub052:0/16] 2024-01-23 05:34:36,941 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 05:34:40,922 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 05:34:40,922 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub052:0/16] 2024-01-23 05:34:40,925 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 05:40:27,476 (trainer:737) INFO: 5epoch:train:3701-3800batch: iter_time=3.227, forward_time=0.385, loss_ctc=97.425, loss_att=84.934, acc=0.575, loss=88.681, backward_time=0.450, grad_norm=49.073, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.100, optim0_lr0=4.851e-04, train_time=4.642 +[gpub052:0/16] 2024-01-23 05:42:31,343 (trainer:737) INFO: 5epoch:train:3801-3900batch: iter_time=8.202e-05, forward_time=0.340, loss_ctc=96.973, loss_att=94.464, acc=0.561, loss=95.217, backward_time=0.440, grad_norm=46.986, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.102, optim0_lr0=4.847e-04, train_time=1.239 +[gpub052:0/16] 2024-01-23 05:44:46,560 (trainer:737) INFO: 5epoch:train:3901-4000batch: iter_time=7.774e-05, forward_time=0.312, loss_ctc=71.086, loss_att=70.956, acc=0.583, loss=70.995, backward_time=0.409, grad_norm=36.306, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.095, optim0_lr0=4.843e-04, train_time=1.351 +[gpub052:0/16] 2024-01-23 05:47:17,080 (trainer:737) INFO: 5epoch:train:4001-4100batch: iter_time=4.628e-04, forward_time=0.396, loss_ctc=93.520, loss_att=87.278, acc=0.583, loss=89.151, backward_time=0.439, grad_norm=42.964, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.099, optim0_lr0=4.839e-04, train_time=1.505 +[gpub052:0/16] 2024-01-23 05:49:29,142 (trainer:737) INFO: 5epoch:train:4101-4200batch: iter_time=7.986e-05, forward_time=0.293, loss_ctc=83.909, loss_att=87.440, acc=0.593, loss=86.380, backward_time=0.407, grad_norm=38.368, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.094, optim0_lr0=4.836e-04, train_time=1.320 +[gpub052:0/16] 2024-01-23 05:51:47,780 (trainer:737) INFO: 5epoch:train:4201-4300batch: iter_time=8.569e-05, forward_time=0.357, loss_ctc=71.685, loss_att=74.896, acc=0.579, loss=73.932, backward_time=0.474, grad_norm=33.118, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.100, optim0_lr0=4.832e-04, train_time=1.387 +[gpub052:0/16] 2024-01-23 05:54:07,249 (trainer:737) INFO: 5epoch:train:4301-4400batch: iter_time=8.543e-05, forward_time=0.290, loss_ctc=75.976, loss_att=74.111, acc=0.591, loss=74.670, backward_time=0.401, grad_norm=35.145, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.094, optim0_lr0=4.828e-04, train_time=1.394 +[gpub052:0/16] 2024-01-23 05:56:31,944 (trainer:737) INFO: 5epoch:train:4401-4500batch: iter_time=6.322e-04, forward_time=0.416, loss_ctc=99.352, loss_att=102.343, acc=0.532, loss=101.446, backward_time=0.454, grad_norm=44.548, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=4.824e-04, train_time=1.448 +[gpub052:0/16] 2024-01-23 05:58:46,661 (trainer:737) INFO: 5epoch:train:4501-4600batch: iter_time=8.911e-05, forward_time=0.301, loss_ctc=79.740, loss_att=83.103, acc=0.592, loss=82.094, backward_time=0.404, grad_norm=58.053, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.094, optim0_lr0=4.821e-04, train_time=1.347 +[gpub052:0/16] 2024-01-23 06:01:29,484 (trainer:737) INFO: 5epoch:train:4601-4700batch: iter_time=4.116e-04, forward_time=0.355, loss_ctc=81.712, loss_att=75.875, acc=0.599, loss=77.626, backward_time=0.471, grad_norm=36.768, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.098, optim0_lr0=4.817e-04, train_time=1.627 +[gpub052:0/16] 2024-01-23 06:03:36,283 (trainer:737) INFO: 5epoch:train:4701-4800batch: iter_time=3.136e-04, forward_time=0.358, loss_ctc=73.497, loss_att=83.665, acc=0.533, loss=80.615, backward_time=0.435, grad_norm=37.328, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.098, optim0_lr0=4.813e-04, train_time=1.268 +[gpub052:0/16] 2024-01-23 06:05:36,104 (trainer:737) INFO: 5epoch:train:4801-4900batch: iter_time=8.620e-05, forward_time=0.347, loss_ctc=83.966, loss_att=98.320, acc=0.580, loss=94.014, backward_time=0.422, grad_norm=36.589, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.096, optim0_lr0=4.809e-04, train_time=1.197 +[gpub052:0/16] 2024-01-23 06:08:12,992 (trainer:737) INFO: 5epoch:train:4901-5000batch: iter_time=3.281e-04, forward_time=0.400, loss_ctc=96.116, loss_att=86.782, acc=0.568, loss=89.582, backward_time=0.446, grad_norm=46.436, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.098, optim0_lr0=4.806e-04, train_time=1.569 +[gpub052:0/16] 2024-01-23 06:08:33,044 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub052:0/16] 2024-01-23 06:08:53,017 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 06:08:56,685 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 06:08:56,685 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub052:0/16] 2024-01-23 06:08:56,688 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 06:15:12,951 (trainer:737) INFO: 5epoch:train:5001-5100batch: iter_time=3.069, forward_time=0.306, loss_ctc=93.376, loss_att=92.165, acc=0.575, loss=92.528, backward_time=0.410, grad_norm=43.936, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.096, optim0_lr0=4.802e-04, train_time=4.200 +[gpub052:0/16] 2024-01-23 06:17:56,264 (trainer:737) INFO: 5epoch:train:5101-5200batch: iter_time=8.352e-05, forward_time=0.354, loss_ctc=87.314, loss_att=90.910, acc=0.582, loss=89.831, backward_time=0.466, grad_norm=40.795, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.101, optim0_lr0=4.798e-04, train_time=1.632 +[gpub052:0/16] 2024-01-23 06:19:48,441 (trainer:737) INFO: 5epoch:train:5201-5300batch: iter_time=7.975e-05, forward_time=0.289, loss_ctc=74.984, loss_att=69.837, acc=0.602, loss=71.381, backward_time=0.402, grad_norm=38.621, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.094, optim0_lr0=4.795e-04, train_time=1.120 +[gpub052:0/16] 2024-01-23 06:21:58,692 (trainer:737) INFO: 5epoch:train:5301-5400batch: iter_time=8.610e-05, forward_time=0.379, loss_ctc=86.173, loss_att=87.456, acc=0.597, loss=87.071, backward_time=0.436, grad_norm=35.809, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=4.791e-04, train_time=1.303 +[gpub052:0/16] 2024-01-23 06:24:30,470 (trainer:737) INFO: 5epoch:train:5401-5500batch: iter_time=8.153e-05, forward_time=0.292, loss_ctc=80.083, loss_att=78.165, acc=0.601, loss=78.741, backward_time=0.403, grad_norm=37.432, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.094, optim0_lr0=4.787e-04, train_time=1.516 +[gpub052:0/16] 2024-01-23 06:26:36,920 (trainer:737) INFO: 5epoch:train:5501-5600batch: iter_time=8.387e-05, forward_time=0.374, loss_ctc=68.924, loss_att=72.272, acc=0.592, loss=71.268, backward_time=0.458, grad_norm=34.084, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.101, optim0_lr0=4.784e-04, train_time=1.265 +[gpub052:0/16] 2024-01-23 06:28:36,582 (trainer:737) INFO: 5epoch:train:5601-5700batch: iter_time=8.490e-05, forward_time=0.301, loss_ctc=81.915, loss_att=93.046, acc=0.587, loss=89.707, backward_time=0.432, grad_norm=35.692, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.096, optim0_lr0=4.780e-04, train_time=1.197 +[gpub052:0/16] 2024-01-23 06:31:17,806 (trainer:737) INFO: 5epoch:train:5701-5800batch: iter_time=0.011, forward_time=0.372, loss_ctc=91.011, loss_att=82.664, acc=0.575, loss=85.168, backward_time=0.422, grad_norm=42.096, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.098, optim0_lr0=4.776e-04, train_time=1.612 +[gpub052:0/16] 2024-01-23 06:33:26,847 (trainer:737) INFO: 5epoch:train:5801-5900batch: iter_time=3.111e-04, forward_time=0.403, loss_ctc=79.631, loss_att=78.329, acc=0.615, loss=78.719, backward_time=0.444, grad_norm=32.524, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.100, optim0_lr0=4.773e-04, train_time=1.291 +[gpub052:0/16] 2024-01-23 06:36:00,234 (trainer:737) INFO: 5epoch:train:5901-6000batch: iter_time=0.024, forward_time=0.307, loss_ctc=75.729, loss_att=81.714, acc=0.568, loss=79.919, backward_time=0.409, grad_norm=35.740, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.095, optim0_lr0=4.769e-04, train_time=1.528 +[gpub052:0/16] 2024-01-23 06:39:09,455 (trainer:737) INFO: 5epoch:train:6001-6100batch: iter_time=3.455e-04, forward_time=0.398, loss_ctc=80.926, loss_att=89.046, acc=0.576, loss=86.610, backward_time=0.445, grad_norm=38.169, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.102, optim0_lr0=4.765e-04, train_time=1.897 +[gpub052:0/16] 2024-01-23 06:41:36,970 (trainer:737) INFO: 5epoch:train:6101-6200batch: iter_time=7.852e-05, forward_time=0.293, loss_ctc=86.412, loss_att=93.364, acc=0.585, loss=91.278, backward_time=0.407, grad_norm=39.896, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.094, optim0_lr0=4.762e-04, train_time=1.474 +[gpub052:0/16] 2024-01-23 06:42:56,960 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub052:0/16] 2024-01-23 06:43:26,057 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 06:43:30,951 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 06:43:30,951 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub052:0/16] 2024-01-23 06:43:31,073 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 06:49:19,343 (trainer:737) INFO: 5epoch:train:6201-6300batch: iter_time=3.357, forward_time=0.393, loss_ctc=93.571, loss_att=80.850, acc=0.596, loss=84.666, backward_time=0.418, grad_norm=45.823, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.099, optim0_lr0=4.758e-04, train_time=4.624 +[gpub052:0/16] 2024-01-23 06:52:08,493 (trainer:737) INFO: 5epoch:train:6301-6400batch: iter_time=0.088, forward_time=0.298, loss_ctc=92.719, loss_att=91.761, acc=0.575, loss=92.049, backward_time=0.424, grad_norm=39.921, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.094, optim0_lr0=4.755e-04, train_time=1.691 +[gpub052:0/16] 2024-01-23 06:54:32,569 (trainer:737) INFO: 5epoch:train:6401-6500batch: iter_time=8.059e-05, forward_time=0.352, loss_ctc=67.920, loss_att=67.494, acc=0.602, loss=67.621, backward_time=0.442, grad_norm=35.620, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.104, optim0_lr0=4.751e-04, train_time=1.441 +[gpub052:0/16] 2024-01-23 06:56:31,462 (trainer:737) INFO: 5epoch:train:6501-6600batch: iter_time=8.558e-05, forward_time=0.302, loss_ctc=89.740, loss_att=83.567, acc=0.597, loss=85.419, backward_time=0.419, grad_norm=39.745, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.095, optim0_lr0=4.748e-04, train_time=1.189 +[gpub052:0/16] 2024-01-23 06:58:55,010 (trainer:737) INFO: 5epoch:train:6601-6700batch: iter_time=8.289e-05, forward_time=0.420, loss_ctc=81.990, loss_att=85.452, acc=0.605, loss=84.413, backward_time=0.428, grad_norm=36.021, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.098, optim0_lr0=4.744e-04, train_time=1.435 +[gpub052:0/16] 2024-01-23 07:01:10,347 (trainer:737) INFO: 5epoch:train:6701-6800batch: iter_time=8.102e-05, forward_time=0.319, loss_ctc=68.825, loss_att=72.456, acc=0.593, loss=71.366, backward_time=0.417, grad_norm=31.143, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.099, optim0_lr0=4.740e-04, train_time=1.353 +[gpub052:0/16] 2024-01-23 07:03:20,013 (trainer:737) INFO: 5epoch:train:6801-6900batch: iter_time=8.461e-05, forward_time=0.337, loss_ctc=73.103, loss_att=71.436, acc=0.605, loss=71.936, backward_time=0.420, grad_norm=33.478, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.099, optim0_lr0=4.737e-04, train_time=1.297 +[gpub052:0/16] 2024-01-23 07:05:41,503 (trainer:737) INFO: 5epoch:train:6901-7000batch: iter_time=3.433e-04, forward_time=0.381, loss_ctc=96.311, loss_att=98.349, acc=0.546, loss=97.738, backward_time=0.466, grad_norm=45.446, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.104, optim0_lr0=4.733e-04, train_time=1.414 +[gpub052:0/16] 2024-01-23 07:07:34,886 (trainer:737) INFO: 5epoch:train:7001-7100batch: iter_time=7.818e-05, forward_time=0.292, loss_ctc=77.170, loss_att=80.023, acc=0.606, loss=79.167, backward_time=0.406, grad_norm=33.365, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.094, optim0_lr0=4.730e-04, train_time=1.134 +[gpub052:0/16] 2024-01-23 07:10:20,998 (trainer:737) INFO: 5epoch:train:7101-7200batch: iter_time=2.101e-04, forward_time=0.435, loss_ctc=76.807, loss_att=71.793, acc=0.614, loss=73.297, backward_time=0.435, grad_norm=32.774, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.099, optim0_lr0=4.726e-04, train_time=1.660 +[gpub052:0/16] 2024-01-23 07:12:18,247 (trainer:737) INFO: 5epoch:train:7201-7300batch: iter_time=8.269e-05, forward_time=0.295, loss_ctc=71.524, loss_att=81.163, acc=0.544, loss=78.271, backward_time=0.408, grad_norm=37.672, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.094, optim0_lr0=4.723e-04, train_time=1.173 +[gpub052:0/16] 2024-01-23 07:14:44,291 (trainer:737) INFO: 5epoch:train:7301-7400batch: iter_time=3.227e-04, forward_time=0.365, loss_ctc=80.686, loss_att=95.293, acc=0.593, loss=90.911, backward_time=0.463, grad_norm=34.596, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.100, optim0_lr0=4.719e-04, train_time=1.460 +[gpub052:0/16] 2024-01-23 07:17:19,232 (trainer:737) INFO: 5epoch:train:7401-7500batch: iter_time=6.368e-04, forward_time=0.320, loss_ctc=93.036, loss_att=83.548, acc=0.582, loss=86.395, backward_time=0.447, grad_norm=46.985, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.101, optim0_lr0=4.716e-04, train_time=1.549 +[gpub052:0/16] 2024-01-23 07:17:39,383 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub052:0/16] 2024-01-23 07:17:59,368 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 07:18:02,993 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 07:18:02,993 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub052:0/16] 2024-01-23 07:18:02,996 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 07:24:26,636 (trainer:737) INFO: 5epoch:train:7501-7600batch: iter_time=3.046, forward_time=0.363, loss_ctc=90.899, loss_att=84.257, acc=0.593, loss=86.250, backward_time=0.416, grad_norm=42.488, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.095, optim0_lr0=4.712e-04, train_time=4.274 +[gpub052:0/16] 2024-01-23 07:27:03,349 (trainer:737) INFO: 5epoch:train:7601-7700batch: iter_time=7.722e-05, forward_time=0.364, loss_ctc=85.804, loss_att=85.981, acc=0.585, loss=85.928, backward_time=0.439, grad_norm=40.944, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.102, optim0_lr0=4.709e-04, train_time=1.567 +[gpub052:0/16] 2024-01-23 07:29:09,820 (trainer:737) INFO: 5epoch:train:7701-7800batch: iter_time=8.180e-05, forward_time=0.302, loss_ctc=72.096, loss_att=66.116, acc=0.617, loss=67.910, backward_time=0.405, grad_norm=36.144, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.096, optim0_lr0=4.705e-04, train_time=1.265 +[gpub052:0/16] 2024-01-23 07:31:20,450 (trainer:737) INFO: 5epoch:train:7801-7900batch: iter_time=8.227e-05, forward_time=0.361, loss_ctc=83.177, loss_att=83.413, acc=0.603, loss=83.342, backward_time=0.430, grad_norm=33.576, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.103, optim0_lr0=4.702e-04, train_time=1.306 +[gpub052:0/16] 2024-01-23 07:33:45,636 (trainer:737) INFO: 5epoch:train:7901-8000batch: iter_time=8.133e-05, forward_time=0.324, loss_ctc=77.196, loss_att=74.433, acc=0.612, loss=75.262, backward_time=0.433, grad_norm=37.338, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.105, optim0_lr0=4.698e-04, train_time=1.452 +[gpub052:0/16] 2024-01-23 07:35:49,354 (trainer:737) INFO: 5epoch:train:8001-8100batch: iter_time=1.809e-04, forward_time=0.372, loss_ctc=66.446, loss_att=69.413, acc=0.600, loss=68.522, backward_time=0.427, grad_norm=31.278, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.098, optim0_lr0=4.695e-04, train_time=1.237 +[gpub052:0/16] 2024-01-23 07:38:14,673 (trainer:737) INFO: 5epoch:train:8101-8200batch: iter_time=0.001, forward_time=0.370, loss_ctc=80.014, loss_att=89.537, acc=0.591, loss=86.680, backward_time=0.438, grad_norm=35.043, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.102, optim0_lr0=4.691e-04, train_time=1.452 +[gpub052:0/16] 2024-01-23 07:40:29,821 (trainer:737) INFO: 5epoch:train:8201-8300batch: iter_time=7.982e-05, forward_time=0.299, loss_ctc=86.869, loss_att=78.067, acc=0.577, loss=80.708, backward_time=0.412, grad_norm=40.715, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.095, optim0_lr0=4.688e-04, train_time=1.352 +[gpub052:0/16] 2024-01-23 07:42:46,350 (trainer:737) INFO: 5epoch:train:8301-8400batch: iter_time=9.146e-04, forward_time=0.343, loss_ctc=77.078, loss_att=75.045, acc=0.621, loss=75.655, backward_time=0.451, grad_norm=31.857, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.103, optim0_lr0=4.685e-04, train_time=1.363 +[gpub052:0/16] 2024-01-23 07:45:04,075 (trainer:737) INFO: 5epoch:train:8401-8500batch: iter_time=6.256e-04, forward_time=0.361, loss_ctc=73.274, loss_att=79.089, acc=0.567, loss=77.345, backward_time=0.444, grad_norm=34.757, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.101, optim0_lr0=4.681e-04, train_time=1.379 +[gpub052:0/16] 2024-01-23 07:47:19,964 (trainer:737) INFO: 5epoch:train:8501-8600batch: iter_time=8.712e-05, forward_time=0.328, loss_ctc=77.557, loss_att=85.212, acc=0.581, loss=82.916, backward_time=0.408, grad_norm=35.465, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.094, optim0_lr0=4.678e-04, train_time=1.359 +[gpub052:0/16] 2024-01-23 07:49:37,001 (trainer:737) INFO: 5epoch:train:8601-8700batch: iter_time=3.591e-04, forward_time=0.419, loss_ctc=83.521, loss_att=90.217, acc=0.592, loss=88.208, backward_time=0.430, grad_norm=38.781, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.098, optim0_lr0=4.674e-04, train_time=1.370 +[gpub052:0/16] 2024-01-23 07:50:53,987 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub052:0/16] 2024-01-23 07:51:13,822 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 07:51:17,440 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 07:51:17,440 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub052:0/16] 2024-01-23 07:51:17,444 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 07:57:01,720 (trainer:737) INFO: 5epoch:train:8701-8800batch: iter_time=3.220, forward_time=0.379, loss_ctc=90.590, loss_att=81.689, acc=0.598, loss=84.359, backward_time=0.421, grad_norm=44.803, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.096, optim0_lr0=4.671e-04, train_time=4.447 +[gpub052:0/16] 2024-01-23 07:59:24,821 (trainer:737) INFO: 5epoch:train:8801-8900batch: iter_time=8.556e-05, forward_time=0.301, loss_ctc=88.075, loss_att=89.885, acc=0.593, loss=89.342, backward_time=0.408, grad_norm=38.798, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.094, optim0_lr0=4.668e-04, train_time=1.430 +[gpub052:0/16] 2024-01-23 08:01:31,865 (trainer:737) INFO: 5epoch:train:8901-9000batch: iter_time=2.146e-04, forward_time=0.404, loss_ctc=65.595, loss_att=66.405, acc=0.615, loss=66.162, backward_time=0.433, grad_norm=32.105, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.101, optim0_lr0=4.664e-04, train_time=1.271 +[gpub052:0/16] 2024-01-23 08:03:52,922 (trainer:737) INFO: 5epoch:train:9001-9100batch: iter_time=8.982e-05, forward_time=0.378, loss_ctc=86.706, loss_att=81.070, acc=0.606, loss=82.761, backward_time=0.434, grad_norm=36.607, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.096, optim0_lr0=4.661e-04, train_time=1.410 +[gpub052:0/16] 2024-01-23 08:06:04,060 (trainer:737) INFO: 5epoch:train:9101-9200batch: iter_time=8.821e-05, forward_time=0.314, loss_ctc=78.720, loss_att=83.516, acc=0.620, loss=82.077, backward_time=0.409, grad_norm=34.387, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.094, optim0_lr0=4.657e-04, train_time=1.311 +[gpub052:0/16] 2024-01-23 08:08:18,769 (trainer:737) INFO: 5epoch:train:9201-9300batch: iter_time=8.435e-05, forward_time=0.391, loss_ctc=67.189, loss_att=71.053, acc=0.602, loss=69.894, backward_time=0.447, grad_norm=29.687, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.100, optim0_lr0=4.654e-04, train_time=1.347 +[gpub052:0/16] 2024-01-23 08:10:27,570 (trainer:737) INFO: 5epoch:train:9301-9400batch: iter_time=8.222e-05, forward_time=0.291, loss_ctc=71.062, loss_att=69.789, acc=0.620, loss=70.171, backward_time=0.404, grad_norm=33.006, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.094, optim0_lr0=4.651e-04, train_time=1.288 +[gpub052:0/16] 2024-01-23 08:12:50,960 (trainer:737) INFO: 5epoch:train:9401-9500batch: iter_time=8.587e-05, forward_time=0.377, loss_ctc=92.788, loss_att=94.867, acc=0.571, loss=94.243, backward_time=0.453, grad_norm=40.694, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.101, optim0_lr0=4.647e-04, train_time=1.433 +[gpub052:0/16] 2024-01-23 08:15:11,441 (trainer:737) INFO: 5epoch:train:9501-9600batch: iter_time=2.943e-04, forward_time=0.358, loss_ctc=74.554, loss_att=78.805, acc=0.620, loss=77.530, backward_time=0.494, grad_norm=31.380, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.103, optim0_lr0=4.644e-04, train_time=1.405 +[gpub052:0/16] 2024-01-23 08:17:22,264 (trainer:737) INFO: 5epoch:train:9601-9700batch: iter_time=8.292e-05, forward_time=0.293, loss_ctc=76.318, loss_att=70.728, acc=0.633, loss=72.405, backward_time=0.405, grad_norm=33.211, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.094, optim0_lr0=4.641e-04, train_time=1.306 +[gpub052:0/16] 2024-01-23 08:19:51,161 (trainer:737) INFO: 5epoch:train:9701-9800batch: iter_time=5.867e-04, forward_time=0.452, loss_ctc=68.401, loss_att=77.856, acc=0.566, loss=75.020, backward_time=0.433, grad_norm=34.564, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.101, optim0_lr0=4.637e-04, train_time=1.488 +[gpub052:0/16] 2024-01-23 08:21:56,733 (trainer:737) INFO: 5epoch:train:9801-9900batch: iter_time=7.717e-05, forward_time=0.298, loss_ctc=78.483, loss_att=92.105, acc=0.609, loss=88.019, backward_time=0.411, grad_norm=34.239, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.095, optim0_lr0=4.634e-04, train_time=1.257 +[gpub052:0/16] 2024-01-23 08:24:22,866 (trainer:737) INFO: 5epoch:train:9901-10000batch: iter_time=4.393e-04, forward_time=0.369, loss_ctc=90.220, loss_att=79.646, acc=0.609, loss=82.818, backward_time=0.450, grad_norm=45.077, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.103, optim0_lr0=4.631e-04, train_time=1.461 +[gpub052:0/16] 2024-01-23 08:24:42,941 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub052:0/16] 2024-01-23 08:25:02,962 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 08:25:06,710 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 08:25:06,710 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub052:0/16] 2024-01-23 08:25:06,713 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 08:31:43,030 (trainer:737) INFO: 5epoch:train:10001-10100batch: iter_time=3.122, forward_time=0.303, loss_ctc=86.586, loss_att=84.892, acc=0.601, loss=85.400, backward_time=0.411, grad_norm=40.086, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.095, optim0_lr0=4.627e-04, train_time=4.402 +[gpub052:0/16] 2024-01-23 08:34:05,586 (trainer:737) INFO: 5epoch:train:10101-10200batch: iter_time=8.291e-05, forward_time=0.416, loss_ctc=82.084, loss_att=84.192, acc=0.605, loss=83.559, backward_time=0.446, grad_norm=35.926, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.100, optim0_lr0=4.624e-04, train_time=1.424 +[gpub052:0/16] 2024-01-23 08:36:37,308 (trainer:737) INFO: 5epoch:train:10201-10300batch: iter_time=7.804e-05, forward_time=0.377, loss_ctc=70.281, loss_att=64.853, acc=0.628, loss=66.481, backward_time=0.440, grad_norm=35.534, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.100, optim0_lr0=4.621e-04, train_time=1.518 +[gpub052:0/16] 2024-01-23 08:38:43,811 (trainer:737) INFO: 5epoch:train:10301-10400batch: iter_time=8.136e-05, forward_time=0.292, loss_ctc=80.526, loss_att=81.156, acc=0.618, loss=80.967, backward_time=0.405, grad_norm=33.394, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.095, optim0_lr0=4.618e-04, train_time=1.265 +[gpub052:0/16] 2024-01-23 08:40:57,219 (trainer:737) INFO: 5epoch:train:10401-10500batch: iter_time=8.217e-05, forward_time=0.426, loss_ctc=74.965, loss_att=72.634, acc=0.625, loss=73.333, backward_time=0.431, grad_norm=32.016, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.100, optim0_lr0=4.614e-04, train_time=1.335 +[gpub052:0/16] 2024-01-23 08:43:21,070 (trainer:737) INFO: 5epoch:train:10501-10600batch: iter_time=8.170e-05, forward_time=0.355, loss_ctc=64.080, loss_att=67.434, acc=0.614, loss=66.428, backward_time=0.436, grad_norm=30.736, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.102, optim0_lr0=4.611e-04, train_time=1.438 +[gpub052:0/16] 2024-01-23 08:45:23,497 (trainer:737) INFO: 5epoch:train:10601-10700batch: iter_time=8.260e-05, forward_time=0.293, loss_ctc=76.769, loss_att=86.366, acc=0.610, loss=83.487, backward_time=0.406, grad_norm=33.563, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.094, optim0_lr0=4.608e-04, train_time=1.223 +[gpub052:0/16] 2024-01-23 08:47:46,527 (trainer:737) INFO: 5epoch:train:10701-10800batch: iter_time=9.050e-05, forward_time=0.356, loss_ctc=84.132, loss_att=76.272, acc=0.596, loss=78.630, backward_time=0.478, grad_norm=38.252, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.101, optim0_lr0=4.604e-04, train_time=1.431 +[gpub052:0/16] 2024-01-23 08:50:02,124 (trainer:737) INFO: 5epoch:train:10801-10900batch: iter_time=4.308e-04, forward_time=0.376, loss_ctc=75.197, loss_att=73.617, acc=0.635, loss=74.091, backward_time=0.431, grad_norm=30.295, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.097, optim0_lr0=4.601e-04, train_time=1.356 +[gpub052:0/16] 2024-01-23 08:52:11,048 (trainer:737) INFO: 5epoch:train:10901-11000batch: iter_time=8.502e-05, forward_time=0.301, loss_ctc=70.912, loss_att=77.926, acc=0.584, loss=75.821, backward_time=0.406, grad_norm=31.793, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.094, optim0_lr0=4.598e-04, train_time=1.289 +[gpub052:0/16] 2024-01-23 08:54:31,383 (trainer:737) INFO: 5epoch:train:11001-11100batch: iter_time=4.389e-04, forward_time=0.360, loss_ctc=75.770, loss_att=82.262, acc=0.600, loss=80.314, backward_time=0.469, grad_norm=33.766, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.100, optim0_lr0=4.595e-04, train_time=1.402 +[gpub052:0/16] 2024-01-23 08:56:41,768 (trainer:737) INFO: 5epoch:train:11101-11200batch: iter_time=3.552e-04, forward_time=0.333, loss_ctc=80.473, loss_att=86.314, acc=0.613, loss=84.561, backward_time=0.443, grad_norm=35.410, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.101, optim0_lr0=4.591e-04, train_time=1.304 +[gpub052:0/16] 2024-01-23 08:58:10,809 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub052:0/16] 2024-01-23 08:58:31,395 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 08:58:35,020 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 08:58:35,020 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub052:0/16] 2024-01-23 08:58:35,024 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 09:04:16,121 (trainer:737) INFO: 5epoch:train:11201-11300batch: iter_time=3.193, forward_time=0.325, loss_ctc=86.457, loss_att=74.735, acc=0.619, loss=78.251, backward_time=0.409, grad_norm=42.844, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.095, optim0_lr0=4.588e-04, train_time=4.543 +[gpub052:0/16] 2024-01-23 09:06:25,051 (trainer:737) INFO: 5epoch:train:11301-11400batch: iter_time=8.010e-05, forward_time=0.409, loss_ctc=86.957, loss_att=85.719, acc=0.600, loss=86.090, backward_time=0.442, grad_norm=38.637, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.101, optim0_lr0=4.585e-04, train_time=1.289 +[gpub052:0/16] 2024-01-23 09:08:55,755 (trainer:737) INFO: 5epoch:train:11401-11500batch: iter_time=7.842e-05, forward_time=0.321, loss_ctc=63.465, loss_att=64.509, acc=0.621, loss=64.195, backward_time=0.401, grad_norm=31.306, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.095, optim0_lr0=4.582e-04, train_time=1.507 +[gpub052:0/16] 2024-01-23 09:11:19,340 (trainer:737) INFO: 5epoch:train:11501-11600batch: iter_time=8.704e-05, forward_time=0.359, loss_ctc=83.778, loss_att=79.049, acc=0.617, loss=80.467, backward_time=0.456, grad_norm=34.360, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.103, optim0_lr0=4.579e-04, train_time=1.435 +[gpub052:0/16] 2024-01-23 09:13:29,938 (trainer:737) INFO: 5epoch:train:11601-11700batch: iter_time=1.421e-04, forward_time=0.408, loss_ctc=75.846, loss_att=80.129, acc=0.625, loss=78.844, backward_time=0.452, grad_norm=31.589, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.100, optim0_lr0=4.575e-04, train_time=1.306 +[gpub052:0/16] 2024-01-23 09:16:04,936 (trainer:737) INFO: 5epoch:train:11701-11800batch: iter_time=8.383e-05, forward_time=0.289, loss_ctc=65.059, loss_att=68.748, acc=0.610, loss=67.641, backward_time=0.402, grad_norm=30.012, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.094, optim0_lr0=4.572e-04, train_time=1.550 +[gpub052:0/16] 2024-01-23 09:18:14,240 (trainer:737) INFO: 5epoch:train:11801-11900batch: iter_time=8.130e-05, forward_time=0.397, loss_ctc=68.709, loss_att=66.830, acc=0.626, loss=67.394, backward_time=0.451, grad_norm=30.021, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.101, optim0_lr0=4.569e-04, train_time=1.293 +[gpub052:0/16] 2024-01-23 09:20:37,527 (trainer:737) INFO: 5epoch:train:11901-12000batch: iter_time=1.632e-04, forward_time=0.340, loss_ctc=90.258, loss_att=91.701, acc=0.568, loss=91.268, backward_time=0.438, grad_norm=40.371, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.101, optim0_lr0=4.566e-04, train_time=1.433 +[gpub052:0/16] 2024-01-23 09:23:10,505 (trainer:737) INFO: 5epoch:train:12001-12100batch: iter_time=8.018e-05, forward_time=0.303, loss_ctc=72.228, loss_att=75.153, acc=0.625, loss=74.275, backward_time=0.407, grad_norm=30.182, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.095, optim0_lr0=4.563e-04, train_time=1.529 +[gpub052:0/16] 2024-01-23 09:25:37,639 (trainer:737) INFO: 5epoch:train:12101-12200batch: iter_time=2.064e-04, forward_time=0.408, loss_ctc=72.448, loss_att=68.266, acc=0.632, loss=69.521, backward_time=0.442, grad_norm=30.291, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.099, optim0_lr0=4.560e-04, train_time=1.471 +[gpub052:0/16] 2024-01-23 09:28:24,664 (trainer:737) INFO: 5epoch:train:12201-12300batch: iter_time=6.093e-04, forward_time=0.378, loss_ctc=66.426, loss_att=75.387, acc=0.567, loss=72.699, backward_time=0.439, grad_norm=33.160, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.107, optim0_lr0=4.556e-04, train_time=1.671 +[gpub052:0/16] 2024-01-23 09:30:47,608 (trainer:737) INFO: 5epoch:train:12301-12400batch: iter_time=8.328e-05, forward_time=0.298, loss_ctc=76.258, loss_att=90.395, acc=0.612, loss=86.154, backward_time=0.408, grad_norm=32.828, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.095, optim0_lr0=4.553e-04, train_time=1.428 +[gpub052:0/16] 2024-01-23 09:33:19,597 (trainer:737) INFO: 5epoch:train:12401-12500batch: iter_time=2.816e-04, forward_time=0.381, loss_ctc=87.831, loss_att=78.232, acc=0.603, loss=81.112, backward_time=0.464, grad_norm=42.840, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.104, optim0_lr0=4.550e-04, train_time=1.521 +[gpub052:0/16] 2024-01-23 09:33:39,808 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub052:0/16] 2024-01-23 09:33:59,915 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 09:34:03,564 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 09:34:03,564 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub052:0/16] 2024-01-23 09:34:03,567 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 09:41:07,504 (trainer:737) INFO: 5epoch:train:12501-12600batch: iter_time=3.348, forward_time=0.411, loss_ctc=84.717, loss_att=82.850, acc=0.611, loss=83.410, backward_time=0.432, grad_norm=38.247, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.097, optim0_lr0=4.547e-04, train_time=4.679 +[gpub052:0/16] 2024-01-23 09:43:16,095 (trainer:737) INFO: 5epoch:train:12601-12700batch: iter_time=7.946e-05, forward_time=0.421, loss_ctc=79.973, loss_att=82.361, acc=0.614, loss=81.645, backward_time=0.441, grad_norm=36.424, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.099, optim0_lr0=4.544e-04, train_time=1.285 +[gpub052:0/16] 2024-01-23 09:46:03,672 (trainer:737) INFO: 5epoch:train:12701-12800batch: iter_time=7.964e-05, forward_time=0.312, loss_ctc=67.957, loss_att=62.618, acc=0.639, loss=64.219, backward_time=0.466, grad_norm=32.667, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.106, optim0_lr0=4.541e-04, train_time=1.676 +[gpub052:0/16] 2024-01-23 09:48:05,914 (trainer:737) INFO: 5epoch:train:12801-12900batch: iter_time=8.120e-05, forward_time=0.349, loss_ctc=77.885, loss_att=78.926, acc=0.627, loss=78.614, backward_time=0.425, grad_norm=32.033, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.095, optim0_lr0=4.538e-04, train_time=1.222 +[gpub052:0/16] 2024-01-23 09:50:49,994 (trainer:737) INFO: 5epoch:train:12901-13000batch: iter_time=8.204e-05, forward_time=0.382, loss_ctc=73.003, loss_att=71.156, acc=0.634, loss=71.710, backward_time=0.431, grad_norm=30.776, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.103, optim0_lr0=4.534e-04, train_time=1.641 +[gpub052:0/16] 2024-01-23 09:53:14,001 (trainer:737) INFO: 5epoch:train:13001-13100batch: iter_time=3.605e-04, forward_time=0.365, loss_ctc=63.157, loss_att=66.094, acc=0.625, loss=65.213, backward_time=0.427, grad_norm=28.569, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.103, optim0_lr0=4.531e-04, train_time=1.439 +[gpub052:0/16] 2024-01-23 09:55:26,891 (trainer:737) INFO: 5epoch:train:13101-13200batch: iter_time=8.433e-05, forward_time=0.353, loss_ctc=75.055, loss_att=84.244, acc=0.618, loss=81.487, backward_time=0.453, grad_norm=33.090, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.099, optim0_lr0=4.528e-04, train_time=1.329 +[gpub052:0/16] 2024-01-23 09:57:58,946 (trainer:737) INFO: 5epoch:train:13201-13300batch: iter_time=2.369e-04, forward_time=0.338, loss_ctc=82.131, loss_att=74.007, acc=0.609, loss=76.444, backward_time=0.412, grad_norm=36.402, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.095, optim0_lr0=4.525e-04, train_time=1.519 +[gpub052:0/16] 2024-01-23 10:00:26,664 (trainer:737) INFO: 5epoch:train:13301-13400batch: iter_time=1.551e-04, forward_time=0.387, loss_ctc=73.574, loss_att=71.736, acc=0.644, loss=72.287, backward_time=0.448, grad_norm=29.910, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.104, optim0_lr0=4.522e-04, train_time=1.478 +[gpub052:0/16] 2024-01-23 10:02:47,518 (trainer:737) INFO: 5epoch:train:13401-13500batch: iter_time=8.094e-05, forward_time=0.327, loss_ctc=69.300, loss_att=74.646, acc=0.599, loss=73.042, backward_time=0.416, grad_norm=32.527, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.102, optim0_lr0=4.519e-04, train_time=1.408 +[gpub052:0/16] 2024-01-23 10:05:04,181 (trainer:737) INFO: 5epoch:train:13501-13600batch: iter_time=3.380e-04, forward_time=0.351, loss_ctc=73.119, loss_att=81.091, acc=0.606, loss=78.700, backward_time=0.413, grad_norm=31.889, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.096, optim0_lr0=4.516e-04, train_time=1.367 +[gpub052:0/16] 2024-01-23 10:07:32,888 (trainer:737) INFO: 5epoch:train:13601-13700batch: iter_time=3.370e-04, forward_time=0.363, loss_ctc=79.025, loss_att=85.857, acc=0.616, loss=83.807, backward_time=0.458, grad_norm=34.719, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.103, optim0_lr0=4.513e-04, train_time=1.486 +[gpub052:0/16] 2024-01-23 10:09:05,830 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub052:0/16] 2024-01-23 10:09:25,819 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 10:09:29,452 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 10:09:29,452 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub052:0/16] 2024-01-23 10:09:29,455 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 10:15:59,484 (trainer:737) INFO: 5epoch:train:13701-13800batch: iter_time=3.610, forward_time=0.425, loss_ctc=84.938, loss_att=73.829, acc=0.630, loss=77.162, backward_time=0.424, grad_norm=39.841, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.097, optim0_lr0=4.510e-04, train_time=5.067 +[gpub052:0/16] 2024-01-23 10:18:31,296 (trainer:737) INFO: 5epoch:train:13801-13900batch: iter_time=6.173e-04, forward_time=0.383, loss_ctc=83.328, loss_att=82.234, acc=0.612, loss=82.562, backward_time=0.433, grad_norm=37.008, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.120, optim0_lr0=4.507e-04, train_time=1.517 +[gpub052:0/16] 2024-01-23 10:21:00,612 (trainer:737) INFO: 5epoch:train:13901-14000batch: iter_time=8.988e-05, forward_time=0.288, loss_ctc=61.805, loss_att=61.709, acc=0.630, loss=61.737, backward_time=0.397, grad_norm=29.814, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.094, optim0_lr0=4.504e-04, train_time=1.493 +[gpub052:0/16] 2024-01-23 10:23:15,056 (trainer:737) INFO: 5epoch:train:14001-14100batch: iter_time=3.399e-04, forward_time=0.413, loss_ctc=81.824, loss_att=76.614, acc=0.627, loss=78.177, backward_time=0.464, grad_norm=37.390, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.104, optim0_lr0=4.501e-04, train_time=1.344 +[gpub052:0/16] 2024-01-23 10:25:28,665 (trainer:737) INFO: 5epoch:train:14101-14200batch: iter_time=8.990e-05, forward_time=0.292, loss_ctc=74.262, loss_att=77.859, acc=0.633, loss=76.780, backward_time=0.404, grad_norm=33.186, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.094, optim0_lr0=4.498e-04, train_time=1.336 +[gpub052:0/16] 2024-01-23 10:28:04,789 (trainer:737) INFO: 5epoch:train:14201-14300batch: iter_time=3.222e-04, forward_time=0.421, loss_ctc=63.732, loss_att=67.414, acc=0.618, loss=66.310, backward_time=0.434, grad_norm=30.245, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.101, optim0_lr0=4.495e-04, train_time=1.561 +[gpub052:0/16] 2024-01-23 10:29:58,870 (trainer:737) INFO: 5epoch:train:14301-14400batch: iter_time=8.785e-05, forward_time=0.289, loss_ctc=67.677, loss_att=65.865, acc=0.633, loss=66.409, backward_time=0.401, grad_norm=30.152, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.094, optim0_lr0=4.492e-04, train_time=1.141 +[gpub052:0/16] 2024-01-23 10:33:03,352 (trainer:737) INFO: 5epoch:train:14401-14500batch: iter_time=0.012, forward_time=0.416, loss_ctc=87.809, loss_att=89.655, acc=0.577, loss=89.101, backward_time=0.440, grad_norm=40.881, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.102, optim0_lr0=4.489e-04, train_time=1.843 +[gpub052:0/16] 2024-01-23 10:35:46,585 (trainer:737) INFO: 5epoch:train:14501-14600batch: iter_time=0.243, forward_time=0.291, loss_ctc=71.167, loss_att=73.524, acc=0.634, loss=72.817, backward_time=0.404, grad_norm=30.044, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.094, optim0_lr0=4.486e-04, train_time=1.633 +[gpub052:0/16] 2024-01-23 10:38:31,174 (trainer:737) INFO: 5epoch:train:14601-14700batch: iter_time=5.400e-04, forward_time=0.405, loss_ctc=71.314, loss_att=66.214, acc=0.640, loss=67.744, backward_time=0.434, grad_norm=30.709, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.111, optim0_lr0=4.483e-04, train_time=1.644 +[gpub052:0/16] 2024-01-23 10:40:27,309 (trainer:737) INFO: 5epoch:train:14701-14800batch: iter_time=8.545e-05, forward_time=0.289, loss_ctc=65.315, loss_att=73.893, acc=0.576, loss=71.320, backward_time=0.402, grad_norm=32.782, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.094, optim0_lr0=4.480e-04, train_time=1.162 +[gpub052:0/16] 2024-01-23 10:43:09,541 (trainer:737) INFO: 5epoch:train:14801-14900batch: iter_time=3.771e-04, forward_time=0.347, loss_ctc=74.192, loss_att=87.851, acc=0.621, loss=83.753, backward_time=0.455, grad_norm=31.965, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.106, optim0_lr0=4.477e-04, train_time=1.622 +[gpub052:0/16] 2024-01-23 10:45:16,704 (trainer:737) INFO: 5epoch:train:14901-15000batch: iter_time=1.709e-04, forward_time=0.294, loss_ctc=85.922, loss_att=76.012, acc=0.613, loss=78.985, backward_time=0.413, grad_norm=40.735, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.099, optim0_lr0=4.474e-04, train_time=1.271 +[gpub052:0/16] 2024-01-23 11:23:38,707 (trainer:343) INFO: 5epoch results: [train] iter_time=0.260, forward_time=0.348, loss_ctc=81.453, loss_att=82.310, acc=0.591, loss=82.053, backward_time=0.429, grad_norm=37.860, clip=100.000, loss_scale=1.792e+15, optim_step_time=0.099, optim0_lr0=4.721e-04, train_time=1.645, time=6 hours, 51 minutes and 47.95 seconds, total_count=75000, gpu_max_cached_mem_GB=41.549, [valid] loss_ctc=69.561, cer_ctc=0.353, loss_att=63.706, acc=0.509, cer=0.425, wer=1.000, loss=65.462, time=37 minutes and 55.51 seconds, total_count=23355, gpu_max_cached_mem_GB=41.549 +[gpub052:0/16] 2024-01-23 11:23:48,676 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub052:0/16] 2024-01-23 11:23:48,739 (trainer:272) INFO: 6/45epoch started. Estimated time to finish: 2 weeks, 2 days and 6 hours +[gpub052:0/16] 2024-01-23 11:23:48,751 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub052:0/16] 2024-01-23 11:24:07,998 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 11:24:11,778 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 11:24:11,778 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub052:0/16] 2024-01-23 11:24:11,781 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 11:31:07,961 (trainer:737) INFO: 6epoch:train:1-100batch: iter_time=3.166, forward_time=0.364, loss_ctc=89.308, loss_att=74.913, acc=0.593, loss=79.231, backward_time=0.414, grad_norm=38.709, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.097, optim0_lr0=4.471e-04, train_time=4.392 +[gpub052:0/16] 2024-01-23 11:33:08,290 (trainer:737) INFO: 6epoch:train:101-200batch: iter_time=8.120e-05, forward_time=0.347, loss_ctc=77.557, loss_att=77.988, acc=0.608, loss=77.859, backward_time=0.426, grad_norm=34.140, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.101, optim0_lr0=4.468e-04, train_time=1.203 +[gpub052:0/16] 2024-01-23 11:35:39,414 (trainer:737) INFO: 6epoch:train:201-300batch: iter_time=0.001, forward_time=0.343, loss_ctc=83.236, loss_att=84.132, acc=0.615, loss=83.863, backward_time=0.412, grad_norm=34.136, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.096, optim0_lr0=4.465e-04, train_time=1.511 +[gpub052:0/16] 2024-01-23 11:37:48,995 (trainer:737) INFO: 6epoch:train:301-400batch: iter_time=7.862e-05, forward_time=0.344, loss_ctc=74.854, loss_att=78.426, acc=0.622, loss=77.354, backward_time=0.434, grad_norm=33.212, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.097, optim0_lr0=4.462e-04, train_time=1.296 +[gpub052:0/16] 2024-01-23 11:40:14,668 (trainer:737) INFO: 6epoch:train:401-500batch: iter_time=3.923e-04, forward_time=0.318, loss_ctc=71.467, loss_att=80.238, acc=0.591, loss=77.607, backward_time=0.432, grad_norm=32.032, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.099, optim0_lr0=4.459e-04, train_time=1.457 +[gpub052:0/16] 2024-01-23 11:42:34,116 (trainer:737) INFO: 6epoch:train:501-600batch: iter_time=8.265e-05, forward_time=0.332, loss_ctc=73.054, loss_att=64.508, acc=0.636, loss=67.072, backward_time=0.418, grad_norm=31.622, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.096, optim0_lr0=4.456e-04, train_time=1.394 +[gpub052:0/16] 2024-01-23 11:45:00,314 (trainer:737) INFO: 6epoch:train:601-700batch: iter_time=3.430e-04, forward_time=0.353, loss_ctc=83.488, loss_att=74.309, acc=0.632, loss=77.062, backward_time=0.435, grad_norm=34.223, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.098, optim0_lr0=4.453e-04, train_time=1.461 +[gpub052:0/16] 2024-01-23 11:47:32,385 (trainer:737) INFO: 6epoch:train:701-800batch: iter_time=9.259e-05, forward_time=0.349, loss_ctc=85.708, loss_att=89.651, acc=0.586, loss=88.468, backward_time=0.458, grad_norm=40.589, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.102, optim0_lr0=4.450e-04, train_time=1.521 +[gpub052:0/16] 2024-01-23 11:49:56,503 (trainer:737) INFO: 6epoch:train:801-900batch: iter_time=1.985e-04, forward_time=0.319, loss_ctc=72.399, loss_att=72.762, acc=0.632, loss=72.653, backward_time=0.416, grad_norm=30.176, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.097, optim0_lr0=4.447e-04, train_time=1.440 +[gpub052:0/16] 2024-01-23 11:52:18,288 (trainer:737) INFO: 6epoch:train:901-1000batch: iter_time=8.643e-05, forward_time=0.379, loss_ctc=84.878, loss_att=93.890, acc=0.584, loss=91.186, backward_time=0.462, grad_norm=39.236, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.100, optim0_lr0=4.444e-04, train_time=1.418 +[gpub052:0/16] 2024-01-23 11:54:47,075 (trainer:737) INFO: 6epoch:train:1001-1100batch: iter_time=3.179e-04, forward_time=0.315, loss_ctc=69.183, loss_att=68.629, acc=0.622, loss=68.795, backward_time=0.402, grad_norm=32.877, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.094, optim0_lr0=4.441e-04, train_time=1.488 +[gpub052:0/16] 2024-01-23 11:57:21,742 (trainer:737) INFO: 6epoch:train:1101-1200batch: iter_time=8.261e-05, forward_time=0.327, loss_ctc=69.025, loss_att=70.537, acc=0.644, loss=70.083, backward_time=0.411, grad_norm=31.048, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.097, optim0_lr0=4.438e-04, train_time=1.546 +[gpub052:0/16] 2024-01-23 11:58:51,882 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub052:0/16] 2024-01-23 11:59:11,567 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 11:59:15,192 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 11:59:15,192 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub052:0/16] 2024-01-23 11:59:15,196 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 12:04:57,337 (trainer:737) INFO: 6epoch:train:1201-1300batch: iter_time=3.294, forward_time=0.346, loss_ctc=90.490, loss_att=77.747, acc=0.606, loss=81.570, backward_time=0.448, grad_norm=38.273, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.100, optim0_lr0=4.435e-04, train_time=4.556 +[gpub052:0/16] 2024-01-23 12:07:18,768 (trainer:737) INFO: 6epoch:train:1301-1400batch: iter_time=8.306e-05, forward_time=0.363, loss_ctc=74.923, loss_att=74.172, acc=0.629, loss=74.397, backward_time=0.425, grad_norm=32.469, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.103, optim0_lr0=4.432e-04, train_time=1.414 +[gpub052:0/16] 2024-01-23 12:09:36,265 (trainer:737) INFO: 6epoch:train:1401-1500batch: iter_time=5.670e-04, forward_time=0.297, loss_ctc=74.784, loss_att=84.146, acc=0.622, loss=81.337, backward_time=0.412, grad_norm=31.725, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.095, optim0_lr0=4.429e-04, train_time=1.375 +[gpub052:0/16] 2024-01-23 12:11:44,939 (trainer:737) INFO: 6epoch:train:1501-1600batch: iter_time=8.861e-05, forward_time=0.407, loss_ctc=82.837, loss_att=83.562, acc=0.611, loss=83.345, backward_time=0.439, grad_norm=35.538, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.102, optim0_lr0=4.427e-04, train_time=1.287 +[gpub052:0/16] 2024-01-23 12:13:40,508 (trainer:737) INFO: 6epoch:train:1601-1700batch: iter_time=8.413e-05, forward_time=0.301, loss_ctc=75.647, loss_att=86.484, acc=0.612, loss=83.233, backward_time=0.418, grad_norm=30.989, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.096, optim0_lr0=4.424e-04, train_time=1.155 +[gpub052:0/16] 2024-01-23 12:16:22,383 (trainer:737) INFO: 6epoch:train:1701-1800batch: iter_time=2.805e-04, forward_time=0.371, loss_ctc=70.461, loss_att=64.177, acc=0.636, loss=66.062, backward_time=0.435, grad_norm=31.487, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.100, optim0_lr0=4.421e-04, train_time=1.618 +[gpub052:0/16] 2024-01-23 12:18:24,953 (trainer:737) INFO: 6epoch:train:1801-1900batch: iter_time=2.318e-04, forward_time=0.311, loss_ctc=66.065, loss_att=71.664, acc=0.638, loss=69.984, backward_time=0.416, grad_norm=28.716, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.104, optim0_lr0=4.418e-04, train_time=1.226 +[gpub052:0/16] 2024-01-23 12:20:30,554 (trainer:737) INFO: 6epoch:train:1901-2000batch: iter_time=4.996e-04, forward_time=0.393, loss_ctc=82.914, loss_att=86.459, acc=0.603, loss=85.396, backward_time=0.436, grad_norm=34.385, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.105, optim0_lr0=4.415e-04, train_time=1.255 +[gpub052:0/16] 2024-01-23 12:22:52,706 (trainer:737) INFO: 6epoch:train:2001-2100batch: iter_time=1.532e-04, forward_time=0.317, loss_ctc=78.308, loss_att=78.901, acc=0.627, loss=78.723, backward_time=0.420, grad_norm=32.478, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.099, optim0_lr0=4.412e-04, train_time=1.422 +[gpub052:0/16] 2024-01-23 12:25:12,812 (trainer:737) INFO: 6epoch:train:2101-2200batch: iter_time=6.757e-04, forward_time=0.361, loss_ctc=78.147, loss_att=72.958, acc=0.638, loss=74.514, backward_time=0.424, grad_norm=31.643, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.099, optim0_lr0=4.409e-04, train_time=1.401 +[gpub052:0/16] 2024-01-23 12:27:23,734 (trainer:737) INFO: 6epoch:train:2201-2300batch: iter_time=8.506e-05, forward_time=0.402, loss_ctc=72.921, loss_att=87.915, acc=0.623, loss=83.416, backward_time=0.442, grad_norm=31.595, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.102, optim0_lr0=4.406e-04, train_time=1.309 +[gpub052:0/16] 2024-01-23 12:29:21,245 (trainer:737) INFO: 6epoch:train:2301-2400batch: iter_time=8.279e-04, forward_time=0.307, loss_ctc=70.392, loss_att=73.128, acc=0.633, loss=72.307, backward_time=0.411, grad_norm=32.070, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.095, optim0_lr0=4.404e-04, train_time=1.174 +[gpub052:0/16] 2024-01-23 12:31:48,871 (trainer:737) INFO: 6epoch:train:2401-2500batch: iter_time=8.083e-05, forward_time=0.369, loss_ctc=72.358, loss_att=71.375, acc=0.639, loss=71.670, backward_time=0.437, grad_norm=33.512, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.107, optim0_lr0=4.401e-04, train_time=1.477 +[gpub052:0/16] 2024-01-23 12:32:09,006 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub052:0/16] 2024-01-23 12:32:28,670 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 12:32:32,256 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 12:32:32,256 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub052:0/16] 2024-01-23 12:32:32,260 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 12:39:19,018 (trainer:737) INFO: 6epoch:train:2501-2600batch: iter_time=3.283, forward_time=0.309, loss_ctc=85.651, loss_att=73.972, acc=0.600, loss=77.476, backward_time=0.409, grad_norm=34.759, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.098, optim0_lr0=4.398e-04, train_time=4.501 +[gpub052:0/16] 2024-01-23 12:41:45,709 (trainer:737) INFO: 6epoch:train:2601-2700batch: iter_time=8.686e-05, forward_time=0.385, loss_ctc=74.407, loss_att=76.343, acc=0.618, loss=75.762, backward_time=0.448, grad_norm=34.737, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.105, optim0_lr0=4.395e-04, train_time=1.466 +[gpub052:0/16] 2024-01-23 12:44:04,866 (trainer:737) INFO: 6epoch:train:2701-2800batch: iter_time=8.324e-05, forward_time=0.301, loss_ctc=78.697, loss_att=80.045, acc=0.626, loss=79.641, backward_time=0.420, grad_norm=34.295, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.096, optim0_lr0=4.392e-04, train_time=1.391 +[gpub052:0/16] 2024-01-23 12:46:10,470 (trainer:737) INFO: 6epoch:train:2801-2900batch: iter_time=1.313e-04, forward_time=0.373, loss_ctc=71.904, loss_att=76.015, acc=0.633, loss=74.781, backward_time=0.435, grad_norm=34.199, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.102, optim0_lr0=4.389e-04, train_time=1.256 +[gpub052:0/16] 2024-01-23 12:48:12,745 (trainer:737) INFO: 6epoch:train:2901-3000batch: iter_time=8.459e-05, forward_time=0.329, loss_ctc=67.978, loss_att=76.260, acc=0.607, loss=73.775, backward_time=0.428, grad_norm=31.698, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.104, optim0_lr0=4.387e-04, train_time=1.222 +[gpub052:0/16] 2024-01-23 12:50:46,948 (trainer:737) INFO: 6epoch:train:3001-3100batch: iter_time=7.288e-04, forward_time=0.357, loss_ctc=69.380, loss_att=62.412, acc=0.646, loss=64.502, backward_time=0.421, grad_norm=29.108, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.097, optim0_lr0=4.384e-04, train_time=1.543 +[gpub052:0/16] 2024-01-23 12:53:10,435 (trainer:737) INFO: 6epoch:train:3101-3200batch: iter_time=7.843e-05, forward_time=0.394, loss_ctc=80.123, loss_att=71.462, acc=0.643, loss=74.060, backward_time=0.425, grad_norm=32.792, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.106, optim0_lr0=4.381e-04, train_time=1.434 +[gpub052:0/16] 2024-01-23 12:55:05,135 (trainer:737) INFO: 6epoch:train:3201-3300batch: iter_time=4.889e-04, forward_time=0.296, loss_ctc=82.135, loss_att=88.158, acc=0.594, loss=86.351, backward_time=0.422, grad_norm=37.601, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.095, optim0_lr0=4.378e-04, train_time=1.147 +[gpub052:0/16] 2024-01-23 12:57:45,279 (trainer:737) INFO: 6epoch:train:3301-3400batch: iter_time=3.515e-04, forward_time=0.382, loss_ctc=69.524, loss_att=70.414, acc=0.640, loss=70.147, backward_time=0.442, grad_norm=29.129, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.107, optim0_lr0=4.375e-04, train_time=1.601 +[gpub052:0/16] 2024-01-23 12:59:53,882 (trainer:737) INFO: 6epoch:train:3401-3500batch: iter_time=8.593e-05, forward_time=0.295, loss_ctc=78.800, loss_att=89.742, acc=0.598, loss=86.459, backward_time=0.408, grad_norm=34.583, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.095, optim0_lr0=4.373e-04, train_time=1.286 +[gpub052:0/16] 2024-01-23 13:02:03,515 (trainer:737) INFO: 6epoch:train:3501-3600batch: iter_time=2.752e-04, forward_time=0.419, loss_ctc=65.337, loss_att=66.429, acc=0.632, loss=66.101, backward_time=0.444, grad_norm=31.934, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.101, optim0_lr0=4.370e-04, train_time=1.296 +[gpub052:0/16] 2024-01-23 13:04:03,423 (trainer:737) INFO: 6epoch:train:3601-3700batch: iter_time=1.053e-04, forward_time=0.303, loss_ctc=66.423, loss_att=68.346, acc=0.654, loss=67.769, backward_time=0.414, grad_norm=30.214, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.096, optim0_lr0=4.367e-04, train_time=1.199 +[gpub052:0/16] 2024-01-23 13:05:57,530 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub052:0/16] 2024-01-23 13:06:17,404 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 13:06:21,012 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 13:06:21,012 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub052:0/16] 2024-01-23 13:06:21,015 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 13:12:03,245 (trainer:737) INFO: 6epoch:train:3701-3800batch: iter_time=3.267, forward_time=0.370, loss_ctc=86.041, loss_att=72.762, acc=0.619, loss=76.746, backward_time=0.428, grad_norm=34.337, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.100, optim0_lr0=4.364e-04, train_time=4.798 +[gpub052:0/16] 2024-01-23 13:14:07,983 (trainer:737) INFO: 6epoch:train:3801-3900batch: iter_time=3.007e-04, forward_time=0.353, loss_ctc=73.429, loss_att=67.026, acc=0.633, loss=68.947, backward_time=0.421, grad_norm=34.839, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.104, optim0_lr0=4.362e-04, train_time=1.247 +[gpub052:0/16] 2024-01-23 13:16:48,700 (trainer:737) INFO: 6epoch:train:3901-4000batch: iter_time=1.916e-04, forward_time=0.345, loss_ctc=72.153, loss_att=76.387, acc=0.628, loss=75.117, backward_time=0.421, grad_norm=29.834, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.096, optim0_lr0=4.359e-04, train_time=1.607 +[gpub052:0/16] 2024-01-23 13:19:02,645 (trainer:737) INFO: 6epoch:train:4001-4100batch: iter_time=8.136e-05, forward_time=0.385, loss_ctc=80.059, loss_att=80.394, acc=0.608, loss=80.293, backward_time=0.440, grad_norm=36.584, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.109, optim0_lr0=4.356e-04, train_time=1.339 +[gpub052:0/16] 2024-01-23 13:21:23,965 (trainer:737) INFO: 6epoch:train:4101-4200batch: iter_time=8.866e-05, forward_time=0.322, loss_ctc=74.840, loss_att=82.781, acc=0.620, loss=80.399, backward_time=0.415, grad_norm=31.025, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.096, optim0_lr0=4.353e-04, train_time=1.413 +[gpub052:0/16] 2024-01-23 13:23:42,848 (trainer:737) INFO: 6epoch:train:4201-4300batch: iter_time=8.282e-05, forward_time=0.338, loss_ctc=68.977, loss_att=62.471, acc=0.639, loss=64.423, backward_time=0.423, grad_norm=30.380, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.095, optim0_lr0=4.351e-04, train_time=1.389 +[gpub052:0/16] 2024-01-23 13:25:57,816 (trainer:737) INFO: 6epoch:train:4301-4400batch: iter_time=3.827e-04, forward_time=0.395, loss_ctc=65.915, loss_att=64.105, acc=0.648, loss=64.648, backward_time=0.426, grad_norm=27.683, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.105, optim0_lr0=4.348e-04, train_time=1.350 +[gpub052:0/16] 2024-01-23 13:28:20,293 (trainer:737) INFO: 6epoch:train:4401-4500batch: iter_time=7.953e-05, forward_time=0.302, loss_ctc=80.371, loss_att=82.981, acc=0.605, loss=82.198, backward_time=0.410, grad_norm=33.372, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.094, optim0_lr0=4.345e-04, train_time=1.424 +[gpub052:0/16] 2024-01-23 13:30:43,499 (trainer:737) INFO: 6epoch:train:4501-4600batch: iter_time=3.965e-04, forward_time=0.378, loss_ctc=74.366, loss_att=72.344, acc=0.635, loss=72.951, backward_time=0.437, grad_norm=30.150, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.099, optim0_lr0=4.342e-04, train_time=1.430 +[gpub052:0/16] 2024-01-23 13:32:42,321 (trainer:737) INFO: 6epoch:train:4601-4700batch: iter_time=8.132e-05, forward_time=0.331, loss_ctc=75.614, loss_att=69.615, acc=0.640, loss=71.415, backward_time=0.449, grad_norm=31.087, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.098, optim0_lr0=4.340e-04, train_time=1.188 +[gpub052:0/16] 2024-01-23 13:35:16,304 (trainer:737) INFO: 6epoch:train:4701-4800batch: iter_time=1.998e-04, forward_time=0.340, loss_ctc=70.860, loss_att=84.396, acc=0.621, loss=80.335, backward_time=0.437, grad_norm=32.435, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.099, optim0_lr0=4.337e-04, train_time=1.539 +[gpub052:0/16] 2024-01-23 13:37:35,334 (trainer:737) INFO: 6epoch:train:4801-4900batch: iter_time=3.310e-04, forward_time=0.363, loss_ctc=67.819, loss_att=70.520, acc=0.630, loss=69.710, backward_time=0.444, grad_norm=31.823, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.102, optim0_lr0=4.334e-04, train_time=1.391 +[gpub052:0/16] 2024-01-23 13:39:50,078 (trainer:737) INFO: 6epoch:train:4901-5000batch: iter_time=8.121e-05, forward_time=0.292, loss_ctc=70.369, loss_att=67.566, acc=0.643, loss=68.407, backward_time=0.411, grad_norm=32.744, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.094, optim0_lr0=4.331e-04, train_time=1.346 +[gpub052:0/16] 2024-01-23 13:40:10,311 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub052:0/16] 2024-01-23 13:40:30,083 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 13:40:34,005 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 13:40:34,005 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub052:0/16] 2024-01-23 13:40:34,008 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 13:46:23,764 (trainer:737) INFO: 6epoch:train:5001-5100batch: iter_time=2.645, forward_time=0.398, loss_ctc=82.972, loss_att=74.892, acc=0.610, loss=77.316, backward_time=0.441, grad_norm=34.043, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.100, optim0_lr0=4.329e-04, train_time=3.938 +[gpub052:0/16] 2024-01-23 13:48:51,209 (trainer:737) INFO: 6epoch:train:5101-5200batch: iter_time=3.416e-04, forward_time=0.383, loss_ctc=72.529, loss_att=78.861, acc=0.631, loss=76.961, backward_time=0.436, grad_norm=31.584, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.099, optim0_lr0=4.326e-04, train_time=1.475 +[gpub052:0/16] 2024-01-23 13:51:06,930 (trainer:737) INFO: 6epoch:train:5201-5300batch: iter_time=7.768e-05, forward_time=0.319, loss_ctc=75.796, loss_att=78.391, acc=0.642, loss=77.612, backward_time=0.417, grad_norm=33.179, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.095, optim0_lr0=4.323e-04, train_time=1.357 +[gpub052:0/16] 2024-01-23 13:53:41,121 (trainer:737) INFO: 6epoch:train:5301-5400batch: iter_time=3.681e-04, forward_time=0.388, loss_ctc=70.462, loss_att=75.914, acc=0.643, loss=74.278, backward_time=0.451, grad_norm=31.088, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.104, optim0_lr0=4.321e-04, train_time=1.542 +[gpub052:0/16] 2024-01-23 13:55:43,879 (trainer:737) INFO: 6epoch:train:5401-5500batch: iter_time=8.047e-05, forward_time=0.334, loss_ctc=66.354, loss_att=76.909, acc=0.609, loss=73.742, backward_time=0.424, grad_norm=32.603, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.096, optim0_lr0=4.318e-04, train_time=1.227 +[gpub052:0/16] 2024-01-23 13:58:23,582 (trainer:737) INFO: 6epoch:train:5501-5600batch: iter_time=1.492e-04, forward_time=0.347, loss_ctc=67.702, loss_att=63.012, acc=0.651, loss=64.419, backward_time=0.414, grad_norm=27.991, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.095, optim0_lr0=4.315e-04, train_time=1.597 +[gpub052:0/16] 2024-01-23 14:01:02,216 (trainer:737) INFO: 6epoch:train:5601-5700batch: iter_time=0.307, forward_time=0.398, loss_ctc=77.723, loss_att=74.103, acc=0.647, loss=75.189, backward_time=0.445, grad_norm=32.395, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.102, optim0_lr0=4.313e-04, train_time=1.586 +[gpub052:0/16] 2024-01-23 14:03:40,069 (trainer:737) INFO: 6epoch:train:5701-5800batch: iter_time=8.314e-05, forward_time=0.293, loss_ctc=79.175, loss_att=87.336, acc=0.609, loss=84.888, backward_time=0.405, grad_norm=34.306, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.094, optim0_lr0=4.310e-04, train_time=1.578 +[gpub052:0/16] 2024-01-23 14:05:49,894 (trainer:737) INFO: 6epoch:train:5801-5900batch: iter_time=2.953e-04, forward_time=0.433, loss_ctc=67.993, loss_att=70.604, acc=0.660, loss=69.821, backward_time=0.433, grad_norm=28.671, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.101, optim0_lr0=4.307e-04, train_time=1.298 +[gpub052:0/16] 2024-01-23 14:08:36,890 (trainer:737) INFO: 6epoch:train:5901-6000batch: iter_time=6.651e-04, forward_time=0.373, loss_ctc=77.626, loss_att=89.365, acc=0.614, loss=85.843, backward_time=0.438, grad_norm=34.335, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=4.305e-04, train_time=1.669 +[gpub052:0/16] 2024-01-23 14:11:00,048 (trainer:737) INFO: 6epoch:train:6001-6100batch: iter_time=7.728e-05, forward_time=0.291, loss_ctc=65.197, loss_att=66.571, acc=0.646, loss=66.159, backward_time=0.403, grad_norm=31.000, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.094, optim0_lr0=4.302e-04, train_time=1.432 +[gpub052:0/16] 2024-01-23 14:13:09,352 (trainer:737) INFO: 6epoch:train:6101-6200batch: iter_time=4.751e-04, forward_time=0.422, loss_ctc=64.019, loss_att=66.696, acc=0.667, loss=65.893, backward_time=0.429, grad_norm=27.994, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.104, optim0_lr0=4.299e-04, train_time=1.292 +[gpub052:0/16] 2024-01-23 14:14:58,466 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub052:0/16] 2024-01-23 14:15:18,114 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub052:0/16] 2024-01-23 14:15:21,791 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub052:0/16] 2024-01-23 14:15:21,791 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub052:0/16] 2024-01-23 14:15:21,794 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub052:0/16] 2024-01-23 14:21:16,621 (trainer:737) INFO: 6epoch:train:6201-6300batch: iter_time=3.309, forward_time=0.379, loss_ctc=83.622, loss_att=72.710, acc=0.626, loss=75.984, backward_time=0.425, grad_norm=34.758, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.098, optim0_lr0=4.297e-04, train_time=4.873 +[gpub052:0/16] 2024-01-23 14:23:42,764 (trainer:737) INFO: 6epoch:train:6301-6400batch: iter_time=7.469e-05, forward_time=0.424, loss_ctc=70.968, loss_att=67.491, acc=0.649, loss=68.534, backward_time=0.428, grad_norm=29.232, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.097, optim0_lr0=4.294e-04, train_time=1.461 +[gpub052:0/16] 2024-01-23 14:25:53,181 (trainer:737) INFO: 6epoch:train:6401-6500batch: iter_time=8.008e-05, forward_time=0.304, loss_ctc=70.543, loss_att=78.490, acc=0.642, loss=76.106, backward_time=0.413, grad_norm=29.811, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.094, optim0_lr0=4.291e-04, train_time=1.303 +[gpub052:0/16] 2024-01-23 14:28:09,859 (trainer:737) INFO: 6epoch:train:6501-6600batch: iter_time=8.593e-05, forward_time=0.354, loss_ctc=78.352, loss_att=78.608, acc=0.626, loss=78.531, backward_time=0.464, grad_norm=34.285, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.102, optim0_lr0=4.289e-04, train_time=1.368 +[gpub052:0/16] 2024-01-23 14:30:55,393 (trainer:737) INFO: 6epoch:train:6601-6700batch: iter_time=3.821e-04, forward_time=0.353, loss_ctc=71.847, loss_att=82.209, acc=0.628, loss=79.101, backward_time=0.446, grad_norm=30.166, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.104, optim0_lr0=4.286e-04, train_time=1.655 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2878933.0 ON gpub052 CANCELLED AT 2024-01-23T14:32:56 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.18.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.18.log new file mode 100644 index 0000000000000000000000000000000000000000..f7cb3139711a195edc29081eda038455188bf749 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.18.log @@ -0,0 +1,1991 @@ +# Running on gpub024.delta.ncsa.illinois.edu +# Started at Sat Jan 20 18:27:57 CST 2024 +# SLURMD_NODENAME=gpub024 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2877332 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x8)' +# SLURM_JOB_END_TIME=1705969637 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2877332 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[024,026-027,029,031,038,040,042]' +# SLURM_JOB_NUM_NODES=8 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1705796837 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=8 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[024,026-027,029,031,038,040,042]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login01.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x8)' +# SLURM_TASK_PID=370659 +# SLURM_TOPOLOGY_ADDR=ss00.ss10.gpub024 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessi_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +ng_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_87808da5-0a77-452f-b2ef-79e88058766b +[gpub024:0/32] 2024-01-20 18:37:38,742 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub024:0/32] 2024-01-20 18:37:53,683 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 32 nodes. +[gpub024:0/32] 2024-01-20 18:37:53,755 (s2t:464) INFO: Vocabulary size: 50002 +[gpub024:0/32] 2024-01-20 18:38:03,970 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub024:0/32] 2024-01-20 18:38:03,976 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub024:0/32] 2024-01-20 18:38:03,976 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub024:0/32] 2024-01-20 18:38:03,976 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub024:0/32] 2024-01-20 18:38:03,978 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub024:0/32] 2024-01-20 18:38:09,356 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 18:38:10,279 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 18:38:10,279 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub024:0/32] 2024-01-20 18:38:10,280 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +gpub024:370832:370832 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:370832:370832 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:370832:370832 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub024:0/32] 2024-01-20 18:38:25,974 (trainer:284) INFO: 1/45epoch started +[gpub024:0/32] 2024-01-20 18:38:26,023 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub024:0/32] 2024-01-20 18:38:44,427 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 18:38:48,114 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 18:38:48,114 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub024:0/32] 2024-01-20 18:38:48,118 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub031:1194500:1194500 [3] NCCL INFO cudaDriverVersion 12020 +gpub031:1194500:1194500 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1194500:1194500 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1194500:1194561 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1194500:1194561 [3] NCCL INFO Using network IB +gpub031:1194500:1194561 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub031:1194500:1194561 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub031:1194500:1194561 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub031:1194500:1194561 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub031:1194500:1194561 [3] NCCL INFO Connected all rings +gpub031:1194500:1194561 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub031:1194500:1194561 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub031:1194500:1194561 [3] NCCL INFO Connected all trees +gpub031:1194500:1194561 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub031:1194500:1194561 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1194500:1194561 [3] NCCL INFO comm 0x144ee660 rank 19 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1194498:1194498 [1] NCCL INFO cudaDriverVersion 12020 +gpub031:1194498:1194498 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1194498:1194498 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1194498:1194562 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1194498:1194562 [1] NCCL INFO Using network IB +gpub031:1194498:1194562 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub031:1194498:1194562 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub031:1194498:1194562 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub031:1194498:1194562 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub031:1194498:1194562 [1] NCCL INFO Connected all rings +gpub031:1194498:1194562 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub031:1194498:1194562 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub031:1194498:1194562 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub031:1194498:1194562 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub031:1194498:1194562 [1] NCCL INFO Connected all trees +gpub031:1194498:1194562 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub031:1194498:1194562 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1194498:1194562 [1] NCCL INFO comm 0x17154ae0 rank 17 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub031:1194497:1194497 [0] NCCL INFO cudaDriverVersion 12020 +gpub031:1194497:1194497 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1194497:1194497 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1194497:1194564 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1194497:1194564 [0] NCCL INFO Using network IB +gpub031:1194497:1194564 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub031:1194497:1194564 [0] NCCL INFO Trees [0] 17/24/-1->16->0 [1] 17/-1/-1->16->20 +gpub031:1194497:1194564 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub031:1194497:1194564 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub031:1194497:1194564 [0] NCCL INFO Connected all rings +gpub031:1194497:1194564 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 00/0 : 0[7000] -> 16[7000] [receive] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 00/0 : 16[7000] -> 0[7000] [send] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub031:1194497:1194564 [0] NCCL INFO Connected all trees +gpub031:1194497:1194564 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub031:1194497:1194564 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1194497:1194564 [0] NCCL INFO comm 0x146eea40 rank 16 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub031:1194499:1194499 [2] NCCL INFO cudaDriverVersion 12020 +gpub031:1194499:1194499 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1194499:1194499 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1194499:1194563 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1194499:1194563 [2] NCCL INFO Using network IB +gpub031:1194499:1194563 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub031:1194499:1194563 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub031:1194499:1194563 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub031:1194499:1194563 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub031:1194499:1194563 [2] NCCL INFO Connected all rings +gpub031:1194499:1194563 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub031:1194499:1194563 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub031:1194499:1194563 [2] NCCL INFO Connected all trees +gpub031:1194499:1194563 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub031:1194499:1194563 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1194499:1194563 [2] NCCL INFO comm 0xe55c030 rank 18 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub026:3211453:3211453 [3] NCCL INFO cudaDriverVersion 12020 +gpub026:3211453:3211453 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:3211453:3211453 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:3211453:3211510 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:3211453:3211510 [3] NCCL INFO Using network IB +gpub026:3211453:3211510 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub026:3211453:3211510 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub026:3211453:3211510 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub026:3211453:3211510 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub026:3211453:3211510 [3] NCCL INFO Connected all rings +gpub026:3211453:3211510 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub026:3211453:3211510 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub026:3211453:3211510 [3] NCCL INFO Connected all trees +gpub026:3211453:3211510 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub026:3211453:3211510 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:3211453:3211510 [3] NCCL INFO comm 0x1093d4c0 rank 7 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub026:3211452:3211452 [2] NCCL INFO cudaDriverVersion 12020 +gpub026:3211452:3211452 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:3211452:3211452 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:3211452:3211513 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:3211452:3211513 [2] NCCL INFO Using network IB +gpub026:3211452:3211513 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub026:3211452:3211513 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub026:3211452:3211513 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub026:3211452:3211513 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub026:3211452:3211513 [2] NCCL INFO Connected all rings +gpub026:3211452:3211513 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub026:3211452:3211513 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub026:3211452:3211513 [2] NCCL INFO Connected all trees +gpub026:3211452:3211513 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub026:3211452:3211513 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:3211452:3211513 [2] NCCL INFO comm 0x14994570 rank 6 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub026:3211451:3211451 [1] NCCL INFO cudaDriverVersion 12020 +gpub026:3211451:3211451 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:3211451:3211451 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:3211451:3211509 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:3211451:3211509 [1] NCCL INFO Using network IB +gpub026:3211451:3211509 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub026:3211451:3211509 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub026:3211451:3211509 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub026:3211451:3211509 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub026:3211451:3211509 [1] NCCL INFO Connected all rings +gpub026:3211451:3211509 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub026:3211451:3211509 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub026:3211451:3211509 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub026:3211451:3211509 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub026:3211451:3211509 [1] NCCL INFO Connected all trees +gpub026:3211451:3211509 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub026:3211451:3211509 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:3211451:3211509 [1] NCCL INFO comm 0x106512e0 rank 5 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub026:3211450:3211450 [0] NCCL INFO cudaDriverVersion 12020 +gpub026:3211450:3211450 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:3211450:3211450 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:3211450:3211508 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:3211450:3211508 [0] NCCL INFO Using network IB +gpub026:3211450:3211508 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub026:3211450:3211508 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub026:3211450:3211508 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub026:3211450:3211508 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub026:3211450:3211508 [0] NCCL INFO Connected all rings +gpub026:3211450:3211508 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub026:3211450:3211508 [0] NCCL INFO Connected all trees +gpub026:3211450:3211508 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub026:3211450:3211508 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:3211450:3211508 [0] NCCL INFO comm 0xff71720 rank 4 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub040:2133145:2133145 [2] NCCL INFO cudaDriverVersion 12020 +gpub040:2133145:2133145 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2133145:2133145 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2133145:2133222 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2133145:2133222 [2] NCCL INFO Using network IB +gpub040:2133145:2133222 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub040:2133145:2133222 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub040:2133145:2133222 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub040:2133145:2133222 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub040:2133145:2133222 [2] NCCL INFO Connected all rings +gpub040:2133145:2133222 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub040:2133145:2133222 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub040:2133145:2133222 [2] NCCL INFO Connected all trees +gpub040:2133145:2133222 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub040:2133145:2133222 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2133145:2133222 [2] NCCL INFO comm 0x104cb3f0 rank 26 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub029:521670:521670 [1] NCCL INFO cudaDriverVersion 12020 +gpub029:521670:521670 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.129<0> +gpub029:521670:521670 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub029:521670:521730 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.129<0> +gpub029:521670:521730 [1] NCCL INFO Using network IB +gpub029:521670:521730 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub029:521670:521730 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub029:521670:521730 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub029:521670:521730 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub029:521670:521730 [1] NCCL INFO Connected all rings +gpub029:521670:521730 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub029:521670:521730 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub029:521670:521730 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub029:521670:521730 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub029:521670:521730 [1] NCCL INFO Connected all trees +gpub029:521670:521730 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub029:521670:521730 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub029:521670:521730 [1] NCCL INFO comm 0x1115f990 rank 13 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub029:521671:521671 [2] NCCL INFO cudaDriverVersion 12020 +gpub029:521671:521671 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.129<0> +gpub029:521671:521671 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub029:521671:521732 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.129<0> +gpub029:521671:521732 [2] NCCL INFO Using network IB +gpub029:521671:521732 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub029:521671:521732 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub029:521671:521732 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub029:521671:521732 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub029:521671:521732 [2] NCCL INFO Connected all rings +gpub029:521671:521732 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub029:521671:521732 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub029:521671:521732 [2] NCCL INFO Connected all trees +gpub029:521671:521732 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub029:521671:521732 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub029:521671:521732 [2] NCCL INFO comm 0xf1ac6a0 rank 14 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub042:3340684:3340684 [1] NCCL INFO cudaDriverVersion 12020 +gpub042:3340684:3340684 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:3340684:3340684 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub042:3340684:3340737 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.142<0> +gpub042:3340684:3340737 [1] NCCL INFO Using network IB +gpub042:3340684:3340737 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub042:3340684:3340737 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/-1/-1->29->28 +gpub042:3340684:3340737 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub042:3340684:3340737 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub042:3340684:3340737 [1] NCCL INFO Connected all rings +gpub042:3340684:3340737 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub042:3340684:3340737 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub042:3340684:3340737 [1] NCCL INFO Connected all trees +gpub042:3340684:3340737 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub042:3340684:3340737 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:3340684:3340737 [1] NCCL INFO comm 0xf7b4e00 rank 29 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub029:521672:521672 [3] NCCL INFO cudaDriverVersion 12020 +gpub029:521672:521672 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.129<0> +gpub029:521672:521672 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub029:521672:521731 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.129<0> +gpub029:521672:521731 [3] NCCL INFO Using network IB +gpub029:521672:521731 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub029:521672:521731 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub029:521672:521731 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub029:521672:521731 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub029:521672:521731 [3] NCCL INFO Connected all rings +gpub029:521672:521731 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub029:521672:521731 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub029:521672:521731 [3] NCCL INFO Connected all trees +gpub029:521672:521731 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub029:521672:521731 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub029:521672:521731 [3] NCCL INFO comm 0x1c3339f0 rank 15 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub038:1156124:1156124 [0] NCCL INFO cudaDriverVersion 12020 +gpub038:1156124:1156124 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1156124:1156124 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1156124:1156219 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1156124:1156219 [0] NCCL INFO Using network IB +gpub038:1156124:1156219 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub038:1156124:1156219 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub038:1156124:1156219 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub038:1156124:1156219 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub038:1156124:1156219 [0] NCCL INFO Connected all rings +gpub038:1156124:1156219 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub038:1156124:1156219 [0] NCCL INFO Connected all trees +gpub038:1156124:1156219 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub038:1156124:1156219 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1156124:1156219 [0] NCCL INFO comm 0x14d1c920 rank 20 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub038:1156126:1156126 [2] NCCL INFO cudaDriverVersion 12020 +gpub038:1156126:1156126 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1156126:1156126 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1156126:1156220 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1156126:1156220 [2] NCCL INFO Using network IB +gpub038:1156126:1156220 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub038:1156126:1156220 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub038:1156126:1156220 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub038:1156126:1156220 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub038:1156126:1156220 [2] NCCL INFO Connected all rings +gpub038:1156126:1156220 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub038:1156126:1156220 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub038:1156126:1156220 [2] NCCL INFO Connected all trees +gpub038:1156126:1156220 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub038:1156126:1156220 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1156126:1156220 [2] NCCL INFO comm 0x17362910 rank 22 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub042:3340686:3340686 [3] NCCL INFO cudaDriverVersion 12020 +gpub042:3340686:3340686 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:3340686:3340686 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub042:3340686:3340739 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.142<0> +gpub042:3340686:3340739 [3] NCCL INFO Using network IB +gpub042:3340686:3340739 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub042:3340686:3340739 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub042:3340686:3340739 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 0[7000] [send] via NET/IB/0 +gpub042:3340686:3340739 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 0[7000] [send] via NET/IB/0 +gpub042:3340686:3340739 [3] NCCL INFO Connected all rings +gpub042:3340686:3340739 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub042:3340686:3340739 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub042:3340686:3340739 [3] NCCL INFO Connected all trees +gpub042:3340686:3340739 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub042:3340686:3340739 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:3340686:3340739 [3] NCCL INFO comm 0x163cf940 rank 31 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub040:2133144:2133144 [1] NCCL INFO cudaDriverVersion 12020 +gpub040:2133144:2133144 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2133144:2133144 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2133144:2133218 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2133144:2133218 [1] NCCL INFO Using network IB +gpub040:2133144:2133218 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub040:2133144:2133218 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub040:2133144:2133218 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub040:2133144:2133218 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub040:2133144:2133218 [1] NCCL INFO Connected all rings +gpub040:2133144:2133218 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub040:2133144:2133218 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub040:2133144:2133218 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub040:2133144:2133218 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub040:2133144:2133218 [1] NCCL INFO Connected all trees +gpub040:2133144:2133218 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub040:2133144:2133218 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2133144:2133218 [1] NCCL INFO comm 0x103eb6e0 rank 25 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub038:1156125:1156125 [1] NCCL INFO cudaDriverVersion 12020 +gpub038:1156125:1156125 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1156125:1156125 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1156125:1156217 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1156125:1156217 [1] NCCL INFO Using network IB +gpub038:1156125:1156217 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub038:1156125:1156217 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub038:1156125:1156217 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub038:1156125:1156217 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub038:1156125:1156217 [1] NCCL INFO Connected all rings +gpub038:1156125:1156217 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub038:1156125:1156217 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub038:1156125:1156217 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub038:1156125:1156217 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub038:1156125:1156217 [1] NCCL INFO Connected all trees +gpub038:1156125:1156217 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub038:1156125:1156217 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1156125:1156217 [1] NCCL INFO comm 0x15ca03c0 rank 21 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub027:248345:248345 [2] NCCL INFO cudaDriverVersion 12020 +gpub027:248345:248345 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:248345:248345 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:248345:248512 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:248345:248512 [2] NCCL INFO Using network IB +gpub027:248345:248512 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub027:248345:248512 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub027:248345:248512 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub027:248345:248512 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub027:248345:248512 [2] NCCL INFO Connected all rings +gpub027:248345:248512 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub027:248345:248512 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub027:248345:248512 [2] NCCL INFO Connected all trees +gpub027:248345:248512 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub027:248345:248512 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:248345:248512 [2] NCCL INFO comm 0xef2d9a0 rank 10 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub040:2133146:2133146 [3] NCCL INFO cudaDriverVersion 12020 +gpub040:2133146:2133146 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2133146:2133146 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2133146:2133219 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2133146:2133219 [3] NCCL INFO Using network IB +gpub040:2133146:2133219 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub040:2133146:2133219 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub040:2133146:2133219 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub040:2133146:2133219 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub040:2133146:2133219 [3] NCCL INFO Connected all rings +gpub040:2133146:2133219 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub040:2133146:2133219 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub040:2133146:2133219 [3] NCCL INFO Connected all trees +gpub040:2133146:2133219 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub040:2133146:2133219 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2133146:2133219 [3] NCCL INFO comm 0x14c4db60 rank 27 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub027:248346:248346 [3] NCCL INFO cudaDriverVersion 12020 +gpub027:248346:248346 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:248346:248346 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:248346:248511 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:248346:248511 [3] NCCL INFO Using network IB +gpub027:248346:248511 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub027:248346:248511 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub027:248346:248511 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub027:248346:248511 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub027:248346:248511 [3] NCCL INFO Connected all rings +gpub027:248346:248511 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub027:248346:248511 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub027:248346:248511 [3] NCCL INFO Connected all trees +gpub027:248346:248511 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub027:248346:248511 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:248346:248511 [3] NCCL INFO comm 0x1a3091a0 rank 11 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub029:521669:521669 [0] NCCL INFO cudaDriverVersion 12020 +gpub029:521669:521669 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.129<0> +gpub029:521669:521669 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub029:521669:521734 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.129<0> +gpub029:521669:521734 [0] NCCL INFO Using network IB +gpub029:521669:521734 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub029:521669:521734 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub029:521669:521734 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub029:521669:521734 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub029:521669:521734 [0] NCCL INFO Connected all rings +gpub029:521669:521734 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub029:521669:521734 [0] NCCL INFO Connected all trees +gpub029:521669:521734 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub029:521669:521734 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub029:521669:521734 [0] NCCL INFO comm 0x11d80350 rank 12 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub027:248344:248344 [1] NCCL INFO cudaDriverVersion 12020 +gpub027:248344:248344 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:248344:248344 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:248344:248509 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:248344:248509 [1] NCCL INFO Using network IB +gpub027:248344:248509 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub027:248344:248509 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub027:248344:248509 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub027:248344:248509 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub027:248344:248509 [1] NCCL INFO Connected all rings +gpub027:248344:248509 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub027:248344:248509 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub027:248344:248509 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub027:248344:248509 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub027:248344:248509 [1] NCCL INFO Connected all trees +gpub027:248344:248509 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub027:248344:248509 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:248344:248509 [1] NCCL INFO comm 0x12ad62e0 rank 9 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub040:2133143:2133143 [0] NCCL INFO cudaDriverVersion 12020 +gpub040:2133143:2133143 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2133143:2133143 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2133143:2133224 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2133143:2133224 [0] NCCL INFO Using network IB +gpub040:2133143:2133224 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub040:2133143:2133224 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub040:2133143:2133224 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub040:2133143:2133224 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub040:2133143:2133224 [0] NCCL INFO Connected all rings +gpub040:2133143:2133224 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub040:2133143:2133224 [0] NCCL INFO Connected all trees +gpub040:2133143:2133224 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub040:2133143:2133224 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2133143:2133224 [0] NCCL INFO comm 0x17e0fdc0 rank 24 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub027:248343:248343 [0] NCCL INFO cudaDriverVersion 12020 +gpub027:248343:248343 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:248343:248343 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:248343:248510 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:248343:248510 [0] NCCL INFO Using network IB +gpub027:248343:248510 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub027:248343:248510 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub027:248343:248510 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub027:248343:248510 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub027:248343:248510 [0] NCCL INFO Connected all rings +gpub027:248343:248510 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub027:248343:248510 [0] NCCL INFO Connected all trees +gpub027:248343:248510 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub027:248343:248510 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:248343:248510 [0] NCCL INFO comm 0x1b7294c0 rank 8 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub038:1156127:1156127 [3] NCCL INFO cudaDriverVersion 12020 +gpub038:1156127:1156127 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1156127:1156127 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1156127:1156218 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1156127:1156218 [3] NCCL INFO Using network IB +gpub038:1156127:1156218 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub038:1156127:1156218 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub038:1156127:1156218 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub038:1156127:1156218 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub038:1156127:1156218 [3] NCCL INFO Connected all rings +gpub038:1156127:1156218 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub038:1156127:1156218 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub038:1156127:1156218 [3] NCCL INFO Connected all trees +gpub038:1156127:1156218 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub038:1156127:1156218 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1156127:1156218 [3] NCCL INFO comm 0x1a2cf7e0 rank 23 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub042:3340683:3340683 [0] NCCL INFO cudaDriverVersion 12020 +gpub042:3340683:3340683 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:3340683:3340683 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub042:3340683:3340740 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.142<0> +gpub042:3340683:3340740 [0] NCCL INFO Using network IB +gpub042:3340683:3340740 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub042:3340683:3340740 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->-1 +gpub042:3340683:3340740 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub042:3340683:3340740 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub042:3340683:3340740 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub042:3340683:3340740 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub042:3340683:3340740 [0] NCCL INFO Connected all rings +gpub042:3340683:3340740 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub042:3340683:3340740 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub042:3340683:3340740 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub042:3340683:3340740 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub042:3340683:3340740 [0] NCCL INFO Connected all trees +gpub042:3340683:3340740 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub042:3340683:3340740 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:3340683:3340740 [0] NCCL INFO comm 0x1630c5f0 rank 28 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub042:3340685:3340685 [2] NCCL INFO cudaDriverVersion 12020 +gpub042:3340685:3340685 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:3340685:3340685 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub042:3340685:3340738 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.142<0> +gpub042:3340685:3340738 [2] NCCL INFO Using network IB +gpub042:3340685:3340738 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub042:3340685:3340738 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub042:3340685:3340738 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub042:3340685:3340738 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub042:3340685:3340738 [2] NCCL INFO Connected all rings +gpub042:3340685:3340738 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub042:3340685:3340738 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub042:3340685:3340738 [2] NCCL INFO Connected all trees +gpub042:3340685:3340738 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub042:3340685:3340738 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:3340685:3340738 [2] NCCL INFO comm 0x17ac17b0 rank 30 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +gpub024:370833:370833 [1] NCCL INFO cudaDriverVersion 12020 +gpub024:370833:370833 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:370833:370833 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:370833:370886 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:370833:370886 [1] NCCL INFO Using network IB +gpub024:370833:370886 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub024:370833:370886 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub024:370833:370886 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub024:370833:370886 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub024:370833:370886 [1] NCCL INFO Connected all rings +gpub024:370833:370886 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub024:370833:370886 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub024:370833:370886 [1] NCCL INFO Connected all trees +gpub024:370833:370886 [1] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub024:370833:370886 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:370833:370886 [1] NCCL INFO comm 0x14718a70 rank 1 nranks 32 cudaDev 1 busId 46000 - Init COMPLETE +gpub024:370832:370887 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:370832:370887 [0] NCCL INFO Using network IB +gpub024:370832:370887 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub024:370832:370887 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub024:370832:370887 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub024:370832:370887 [0] NCCL INFO Trees [0] 1/16/-1->0->-1 [1] 1/-1/-1->0->4 +gpub024:370832:370887 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub024:370832:370887 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub024:370832:370887 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub024:370832:370887 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub024:370832:370887 [0] NCCL INFO Connected all rings +gpub024:370832:370887 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub024:370832:370887 [0] NCCL INFO Channel 00/0 : 16[7000] -> 0[7000] [receive] via NET/IB/0 +gpub024:370832:370887 [0] NCCL INFO Channel 00/0 : 0[7000] -> 16[7000] [send] via NET/IB/0 +gpub024:370832:370887 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub024:370832:370887 [0] NCCL INFO Connected all trees +gpub024:370832:370887 [0] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub024:370832:370887 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:370832:370887 [0] NCCL INFO comm 0x1b1c4d20 rank 0 nranks 32 cudaDev 0 busId 7000 - Init COMPLETE +gpub024:370835:370835 [3] NCCL INFO cudaDriverVersion 12020 +gpub024:370835:370835 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:370835:370835 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:370835:370888 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:370835:370888 [3] NCCL INFO Using network IB +gpub024:370835:370888 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub024:370835:370888 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub024:370835:370888 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub024:370835:370888 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub024:370835:370888 [3] NCCL INFO Connected all rings +gpub024:370835:370888 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub024:370835:370888 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub024:370835:370888 [3] NCCL INFO Connected all trees +gpub024:370835:370888 [3] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub024:370835:370888 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:370835:370888 [3] NCCL INFO comm 0x17ae9650 rank 3 nranks 32 cudaDev 3 busId c7000 - Init COMPLETE +gpub024:370834:370834 [2] NCCL INFO cudaDriverVersion 12020 +gpub024:370834:370834 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:370834:370834 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:370834:370885 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:370834:370885 [2] NCCL INFO Using network IB +gpub024:370834:370885 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub024:370834:370885 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub024:370834:370885 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub024:370834:370885 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub024:370834:370885 [2] NCCL INFO Connected all rings +gpub024:370834:370885 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub024:370834:370885 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub024:370834:370885 [2] NCCL INFO Connected all trees +gpub024:370834:370885 [2] NCCL INFO threadThresholds 8/8/64 | 256/8/64 | 512 | 512 +gpub024:370834:370885 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:370834:370885 [2] NCCL INFO comm 0x15a9a000 rank 2 nranks 32 cudaDev 2 busId 85000 - Init COMPLETE +[gpub024:0/32] 2024-01-20 18:55:18,982 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub024:0/32] 2024-01-20 19:10:24,490 (trainer:737) INFO: 1epoch:train:1-100batch: iter_time=8.017, forward_time=5.067, loss_ctc=7.108e+03, loss_att=391.197, acc=1.527e-05, loss=2.406e+03, backward_time=1.579, grad_norm=1.413e+04, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.532, optim0_lr0=8.583e-08, train_time=19.173 +[gpub024:0/32] 2024-01-20 19:22:54,102 (trainer:737) INFO: 1epoch:train:101-200batch: iter_time=0.007, forward_time=3.200, loss_ctc=3.684e+03, loss_att=398.294, acc=1.718e-05, loss=1.384e+03, backward_time=1.665, grad_norm=1.747e+04, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.303, optim0_lr0=2.525e-07, train_time=7.505 +[gpub024:0/32] 2024-01-20 19:31:46,675 (trainer:737) INFO: 1epoch:train:201-300batch: iter_time=0.005, forward_time=1.924, loss_ctc=603.674, loss_att=431.352, acc=1.173e-05, loss=483.049, backward_time=0.938, grad_norm=2.342e+03, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.182, optim0_lr0=4.192e-07, train_time=5.328 +[gpub024:0/32] 2024-01-20 19:39:38,342 (trainer:737) INFO: 1epoch:train:301-400batch: iter_time=0.003, forward_time=1.344, loss_ctc=381.188, loss_att=373.060, acc=3.251e-05, loss=375.498, backward_time=0.993, grad_norm=252.988, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.179, optim0_lr0=5.858e-07, train_time=4.710 +[gpub024:0/32] 2024-01-20 19:45:26,480 (trainer:737) INFO: 1epoch:train:401-500batch: iter_time=0.001, forward_time=0.885, loss_ctc=392.954, loss_att=399.022, acc=2.600e-05, loss=397.201, backward_time=0.506, grad_norm=250.900, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.125, optim0_lr0=7.525e-07, train_time=3.485 +[gpub024:0/32] 2024-01-20 19:50:54,887 (trainer:737) INFO: 1epoch:train:501-600batch: iter_time=0.002, forward_time=0.709, loss_ctc=395.354, loss_att=398.357, acc=4.765e-05, loss=397.456, backward_time=0.481, grad_norm=228.791, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.097, optim0_lr0=9.192e-07, train_time=3.285 +[gpub024:0/32] 2024-01-20 19:55:50,223 (trainer:737) INFO: 1epoch:train:601-700batch: iter_time=6.128e-04, forward_time=0.391, loss_ctc=364.242, loss_att=393.167, acc=8.524e-05, loss=384.490, backward_time=0.439, grad_norm=201.715, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.075, optim0_lr0=1.086e-06, train_time=2.953 +[gpub024:0/32] 2024-01-20 20:01:13,591 (trainer:737) INFO: 1epoch:train:701-800batch: iter_time=8.934e-04, forward_time=0.984, loss_ctc=320.784, loss_att=343.284, acc=2.966e-04, loss=336.534, backward_time=0.588, grad_norm=159.260, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.134, optim0_lr0=1.252e-06, train_time=3.236 +[gpub024:0/32] 2024-01-20 20:07:24,330 (trainer:737) INFO: 1epoch:train:801-900batch: iter_time=0.002, forward_time=1.235, loss_ctc=378.305, loss_att=402.403, acc=8.029e-04, loss=395.173, backward_time=0.639, grad_norm=201.389, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.120, optim0_lr0=1.419e-06, train_time=3.707 +[gpub024:0/32] 2024-01-20 20:13:14,564 (trainer:737) INFO: 1epoch:train:901-1000batch: iter_time=0.002, forward_time=1.350, loss_ctc=349.828, loss_att=372.383, acc=0.004, loss=365.616, backward_time=0.619, grad_norm=185.687, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.112, optim0_lr0=1.586e-06, train_time=3.500 +[gpub024:0/32] 2024-01-20 20:19:08,728 (trainer:737) INFO: 1epoch:train:1001-1100batch: iter_time=0.004, forward_time=1.051, loss_ctc=359.903, loss_att=385.543, acc=0.018, loss=377.851, backward_time=0.544, grad_norm=160.993, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.140, optim0_lr0=1.752e-06, train_time=3.541 +[gpub024:0/32] 2024-01-20 20:25:02,408 (trainer:737) INFO: 1epoch:train:1101-1200batch: iter_time=0.005, forward_time=1.262, loss_ctc=375.377, loss_att=388.217, acc=0.035, loss=384.365, backward_time=0.672, grad_norm=191.985, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.166, optim0_lr0=1.919e-06, train_time=3.537 +[gpub024:0/32] 2024-01-20 20:28:26,831 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub024:0/32] 2024-01-20 20:28:45,839 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 20:28:49,381 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 20:28:49,382 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub024:0/32] 2024-01-20 20:28:49,385 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-20 20:41:25,726 (trainer:737) INFO: 1epoch:train:1201-1300batch: iter_time=6.545, forward_time=0.724, loss_ctc=348.215, loss_att=369.677, acc=0.050, loss=363.238, backward_time=0.501, grad_norm=161.461, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.108, optim0_lr0=2.086e-06, train_time=9.834 +[gpub024:0/32] 2024-01-20 20:45:38,679 (trainer:737) INFO: 1epoch:train:1301-1400batch: iter_time=9.093e-05, forward_time=0.325, loss_ctc=369.173, loss_att=417.840, acc=0.045, loss=403.240, backward_time=0.293, grad_norm=145.087, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.055, optim0_lr0=2.253e-06, train_time=2.528 +[gpub024:0/32] 2024-01-20 20:49:20,234 (trainer:737) INFO: 1epoch:train:1401-1500batch: iter_time=8.568e-05, forward_time=0.433, loss_ctc=356.292, loss_att=397.047, acc=0.043, loss=384.821, backward_time=0.360, grad_norm=154.715, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=2.419e-06, train_time=2.217 +[gpub024:0/32] 2024-01-20 20:52:29,993 (trainer:737) INFO: 1epoch:train:1501-1600batch: iter_time=9.064e-05, forward_time=0.467, loss_ctc=324.464, loss_att=363.911, acc=0.042, loss=352.077, backward_time=0.301, grad_norm=116.044, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=2.586e-06, train_time=1.897 +[gpub024:0/32] 2024-01-20 20:55:02,609 (trainer:737) INFO: 1epoch:train:1601-1700batch: iter_time=7.253e-04, forward_time=0.361, loss_ctc=349.119, loss_att=375.923, acc=0.044, loss=367.882, backward_time=0.351, grad_norm=135.732, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.080, optim0_lr0=2.752e-06, train_time=1.527 +[gpub024:0/32] 2024-01-20 20:57:19,221 (trainer:737) INFO: 1epoch:train:1701-1800batch: iter_time=8.372e-05, forward_time=0.415, loss_ctc=314.586, loss_att=345.320, acc=0.043, loss=336.100, backward_time=0.317, grad_norm=109.595, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.080, optim0_lr0=2.919e-06, train_time=1.363 +[gpub024:0/32] 2024-01-20 21:01:11,862 (trainer:737) INFO: 1epoch:train:1801-1900batch: iter_time=8.007e-04, forward_time=0.435, loss_ctc=324.254, loss_att=353.172, acc=0.055, loss=344.497, backward_time=0.265, grad_norm=138.683, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.079, optim0_lr0=3.086e-06, train_time=2.329 +[gpub024:0/32] 2024-01-20 21:03:41,235 (trainer:737) INFO: 1epoch:train:1901-2000batch: iter_time=4.039e-04, forward_time=0.258, loss_ctc=320.499, loss_att=352.307, acc=0.066, loss=342.764, backward_time=0.240, grad_norm=115.747, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.059, optim0_lr0=3.253e-06, train_time=1.491 +[gpub024:0/32] 2024-01-20 21:06:08,186 (trainer:737) INFO: 1epoch:train:2001-2100batch: iter_time=0.002, forward_time=0.366, loss_ctc=309.144, loss_att=334.432, acc=0.079, loss=326.846, backward_time=0.322, grad_norm=116.792, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.081, optim0_lr0=3.419e-06, train_time=1.471 +[gpub024:0/32] 2024-01-20 21:08:25,008 (trainer:737) INFO: 1epoch:train:2101-2200batch: iter_time=2.762e-04, forward_time=0.192, loss_ctc=308.376, loss_att=327.875, acc=0.101, loss=322.025, backward_time=0.241, grad_norm=104.830, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.055, optim0_lr0=3.586e-06, train_time=1.368 +[gpub024:0/32] 2024-01-20 21:11:23,364 (trainer:737) INFO: 1epoch:train:2201-2300batch: iter_time=0.001, forward_time=0.414, loss_ctc=299.648, loss_att=327.446, acc=0.095, loss=319.107, backward_time=0.305, grad_norm=103.609, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.065, optim0_lr0=3.753e-06, train_time=1.782 +[gpub024:0/32] 2024-01-20 21:14:03,227 (trainer:737) INFO: 1epoch:train:2301-2400batch: iter_time=3.871e-04, forward_time=0.439, loss_ctc=330.507, loss_att=357.165, acc=0.091, loss=349.168, backward_time=0.306, grad_norm=129.013, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.066, optim0_lr0=3.919e-06, train_time=1.599 +[gpub024:0/32] 2024-01-20 21:16:44,191 (trainer:737) INFO: 1epoch:train:2401-2500batch: iter_time=8.307e-05, forward_time=0.342, loss_ctc=320.025, loss_att=339.437, acc=0.097, loss=333.613, backward_time=0.317, grad_norm=103.409, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.065, optim0_lr0=4.086e-06, train_time=1.611 +[gpub024:0/32] 2024-01-20 21:17:04,238 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub024:0/32] 2024-01-20 21:17:23,142 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 21:17:26,703 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 21:17:26,703 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub024:0/32] 2024-01-20 21:17:26,706 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-20 21:36:04,615 (trainer:737) INFO: 1epoch:train:2501-2600batch: iter_time=6.291, forward_time=0.342, loss_ctc=301.069, loss_att=310.948, acc=0.109, loss=307.984, backward_time=0.275, grad_norm=108.040, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.066, optim0_lr0=4.253e-06, train_time=11.604 +[gpub024:0/32] 2024-01-20 21:38:59,998 (trainer:737) INFO: 1epoch:train:2601-2700batch: iter_time=8.703e-05, forward_time=0.304, loss_ctc=299.043, loss_att=314.306, acc=0.105, loss=309.727, backward_time=0.294, grad_norm=101.463, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.055, optim0_lr0=4.419e-06, train_time=1.754 +[gpub024:0/32] 2024-01-20 21:43:01,894 (trainer:737) INFO: 1epoch:train:2701-2800batch: iter_time=8.701e-05, forward_time=0.197, loss_ctc=316.568, loss_att=334.264, acc=0.107, loss=328.955, backward_time=0.248, grad_norm=96.280, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.055, optim0_lr0=4.586e-06, train_time=2.419 +[gpub024:0/32] 2024-01-20 21:46:01,444 (trainer:737) INFO: 1epoch:train:2801-2900batch: iter_time=8.414e-05, forward_time=0.441, loss_ctc=278.170, loss_att=285.617, acc=0.112, loss=283.383, backward_time=0.345, grad_norm=87.227, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.073, optim0_lr0=4.752e-06, train_time=1.795 +[gpub024:0/32] 2024-01-20 21:49:13,104 (trainer:737) INFO: 1epoch:train:2901-3000batch: iter_time=8.534e-05, forward_time=0.204, loss_ctc=299.573, loss_att=310.555, acc=0.106, loss=307.261, backward_time=0.226, grad_norm=100.655, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.050, optim0_lr0=4.919e-06, train_time=1.916 +[gpub024:0/32] 2024-01-20 21:52:25,305 (trainer:737) INFO: 1epoch:train:3001-3100batch: iter_time=8.473e-05, forward_time=0.347, loss_ctc=301.884, loss_att=306.786, acc=0.108, loss=305.315, backward_time=0.353, grad_norm=97.884, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.075, optim0_lr0=5.086e-06, train_time=1.922 +[gpub024:0/32] 2024-01-20 21:54:43,667 (trainer:737) INFO: 1epoch:train:3101-3200batch: iter_time=9.716e-05, forward_time=0.207, loss_ctc=290.382, loss_att=304.094, acc=0.116, loss=299.980, backward_time=0.260, grad_norm=105.457, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.050, optim0_lr0=5.252e-06, train_time=1.383 +[gpub024:0/32] 2024-01-20 21:57:53,427 (trainer:737) INFO: 1epoch:train:3201-3300batch: iter_time=9.168e-05, forward_time=0.229, loss_ctc=255.789, loss_att=266.257, acc=0.118, loss=263.117, backward_time=0.231, grad_norm=79.841, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.062, optim0_lr0=5.419e-06, train_time=1.898 +[gpub024:0/32] 2024-01-20 22:01:22,128 (trainer:737) INFO: 1epoch:train:3301-3400batch: iter_time=8.734e-05, forward_time=0.404, loss_ctc=302.288, loss_att=310.324, acc=0.111, loss=307.913, backward_time=0.344, grad_norm=106.725, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.073, optim0_lr0=5.586e-06, train_time=2.087 +[gpub024:0/32] 2024-01-20 22:03:43,082 (trainer:737) INFO: 1epoch:train:3401-3500batch: iter_time=8.881e-05, forward_time=0.183, loss_ctc=279.487, loss_att=286.844, acc=0.111, loss=284.637, backward_time=0.224, grad_norm=95.334, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.052, optim0_lr0=5.753e-06, train_time=1.408 +[gpub024:0/32] 2024-01-20 22:06:50,874 (trainer:737) INFO: 1epoch:train:3501-3600batch: iter_time=0.150, forward_time=0.184, loss_ctc=283.928, loss_att=296.965, acc=0.111, loss=293.054, backward_time=0.246, grad_norm=93.639, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.056, optim0_lr0=5.919e-06, train_time=1.878 +[gpub024:0/32] 2024-01-20 22:10:19,461 (trainer:737) INFO: 1epoch:train:3601-3700batch: iter_time=8.749e-05, forward_time=0.449, loss_ctc=300.761, loss_att=301.375, acc=0.111, loss=301.191, backward_time=0.293, grad_norm=99.802, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.065, optim0_lr0=6.086e-06, train_time=2.087 +[gpub024:0/32] 2024-01-20 22:12:08,052 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub024:0/32] 2024-01-20 22:12:26,700 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 22:12:30,288 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 22:12:30,288 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub024:0/32] 2024-01-20 22:12:30,394 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-20 22:24:22,176 (trainer:737) INFO: 1epoch:train:3701-3800batch: iter_time=6.818, forward_time=0.171, loss_ctc=289.721, loss_att=289.438, acc=0.119, loss=289.523, backward_time=0.227, grad_norm=102.882, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.051, optim0_lr0=6.253e-06, train_time=8.427 +[gpub024:0/32] 2024-01-20 22:26:43,907 (trainer:737) INFO: 1epoch:train:3801-3900batch: iter_time=8.849e-05, forward_time=0.186, loss_ctc=300.682, loss_att=334.632, acc=0.105, loss=324.447, backward_time=0.230, grad_norm=100.017, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.051, optim0_lr0=6.419e-06, train_time=1.417 +[gpub024:0/32] 2024-01-20 22:29:28,883 (trainer:737) INFO: 1epoch:train:3901-4000batch: iter_time=9.533e-05, forward_time=0.389, loss_ctc=296.858, loss_att=317.228, acc=0.111, loss=311.117, backward_time=0.316, grad_norm=99.470, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=6.586e-06, train_time=1.650 +[gpub024:0/32] 2024-01-20 22:33:27,432 (trainer:737) INFO: 1epoch:train:4001-4100batch: iter_time=9.802e-05, forward_time=0.229, loss_ctc=266.485, loss_att=291.302, acc=0.116, loss=283.857, backward_time=0.275, grad_norm=84.792, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=6.752e-06, train_time=2.385 +[gpub024:0/32] 2024-01-20 22:35:25,892 (trainer:737) INFO: 1epoch:train:4101-4200batch: iter_time=1.029e-04, forward_time=0.164, loss_ctc=298.556, loss_att=312.756, acc=0.116, loss=308.496, backward_time=0.226, grad_norm=100.008, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=6.919e-06, train_time=1.185 +[gpub024:0/32] 2024-01-20 22:37:19,155 (trainer:737) INFO: 1epoch:train:4201-4300batch: iter_time=9.227e-05, forward_time=0.182, loss_ctc=265.312, loss_att=290.080, acc=0.115, loss=282.649, backward_time=0.226, grad_norm=107.829, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=7.086e-06, train_time=1.132 +[gpub024:0/32] 2024-01-20 22:39:32,458 (trainer:737) INFO: 1epoch:train:4301-4400batch: iter_time=8.702e-05, forward_time=0.369, loss_ctc=286.412, loss_att=305.438, acc=0.125, loss=299.730, backward_time=0.313, grad_norm=104.624, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=7.253e-06, train_time=1.332 +[gpub024:0/32] 2024-01-20 22:41:53,325 (trainer:737) INFO: 1epoch:train:4401-4500batch: iter_time=8.774e-05, forward_time=0.211, loss_ctc=279.545, loss_att=305.048, acc=0.118, loss=297.397, backward_time=0.273, grad_norm=98.376, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=7.419e-06, train_time=1.409 +[gpub024:0/32] 2024-01-20 22:43:49,902 (trainer:737) INFO: 1epoch:train:4501-4600batch: iter_time=8.875e-05, forward_time=0.161, loss_ctc=277.268, loss_att=294.900, acc=0.125, loss=289.610, backward_time=0.229, grad_norm=100.884, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=7.586e-06, train_time=1.165 +[gpub024:0/32] 2024-01-20 22:47:57,064 (trainer:737) INFO: 1epoch:train:4601-4700batch: iter_time=1.025e-04, forward_time=0.505, loss_ctc=281.201, loss_att=291.085, acc=0.126, loss=288.119, backward_time=0.315, grad_norm=103.691, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=7.753e-06, train_time=2.472 +[gpub024:0/32] 2024-01-20 22:50:01,404 (trainer:737) INFO: 1epoch:train:4701-4800batch: iter_time=9.262e-05, forward_time=0.181, loss_ctc=269.646, loss_att=291.312, acc=0.120, loss=284.812, backward_time=0.235, grad_norm=95.532, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=7.919e-06, train_time=1.243 +[gpub024:0/32] 2024-01-20 22:52:31,972 (trainer:737) INFO: 1epoch:train:4801-4900batch: iter_time=8.772e-05, forward_time=0.162, loss_ctc=300.142, loss_att=321.756, acc=0.117, loss=315.272, backward_time=0.225, grad_norm=116.778, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=8.086e-06, train_time=1.504 +[gpub024:0/32] 2024-01-20 22:54:31,936 (trainer:737) INFO: 1epoch:train:4901-5000batch: iter_time=2.798e-04, forward_time=0.246, loss_ctc=291.556, loss_att=305.632, acc=0.127, loss=301.410, backward_time=0.249, grad_norm=119.888, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.063, optim0_lr0=8.252e-06, train_time=1.201 +[gpub024:0/32] 2024-01-20 22:54:51,965 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub024:0/32] 2024-01-20 22:55:11,321 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 22:55:14,816 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 22:55:14,816 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub024:0/32] 2024-01-20 22:55:14,819 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-20 23:09:21,101 (trainer:737) INFO: 1epoch:train:5001-5100batch: iter_time=6.748, forward_time=0.163, loss_ctc=280.352, loss_att=288.171, acc=0.135, loss=285.825, backward_time=0.225, grad_norm=109.360, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.051, optim0_lr0=8.419e-06, train_time=8.891 +[gpub024:0/32] 2024-01-20 23:10:46,449 (trainer:737) INFO: 1epoch:train:5101-5200batch: iter_time=1.039e-04, forward_time=0.161, loss_ctc=281.041, loss_att=293.839, acc=0.120, loss=289.999, backward_time=0.228, grad_norm=113.574, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.051, optim0_lr0=8.586e-06, train_time=0.853 +[gpub024:0/32] 2024-01-20 23:14:22,646 (trainer:737) INFO: 1epoch:train:5201-5300batch: iter_time=9.378e-05, forward_time=0.250, loss_ctc=295.798, loss_att=309.933, acc=0.126, loss=305.692, backward_time=0.283, grad_norm=116.875, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.062, optim0_lr0=8.753e-06, train_time=2.162 +[gpub024:0/32] 2024-01-20 23:16:53,960 (trainer:737) INFO: 1epoch:train:5301-5400batch: iter_time=4.429e-04, forward_time=0.304, loss_ctc=262.365, loss_att=268.039, acc=0.138, loss=266.337, backward_time=0.247, grad_norm=92.865, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.052, optim0_lr0=8.919e-06, train_time=1.513 +[gpub024:0/32] 2024-01-20 23:20:54,219 (trainer:737) INFO: 1epoch:train:5401-5500batch: iter_time=9.416e-05, forward_time=0.164, loss_ctc=285.595, loss_att=294.133, acc=0.130, loss=291.571, backward_time=0.224, grad_norm=115.563, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=9.086e-06, train_time=2.403 +[gpub024:0/32] 2024-01-20 23:22:37,100 (trainer:737) INFO: 1epoch:train:5501-5600batch: iter_time=9.800e-05, forward_time=0.161, loss_ctc=292.112, loss_att=293.890, acc=0.135, loss=293.357, backward_time=0.226, grad_norm=138.736, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.051, optim0_lr0=9.252e-06, train_time=1.029 +[gpub024:0/32] 2024-01-20 23:25:14,354 (trainer:737) INFO: 1epoch:train:5601-5700batch: iter_time=9.905e-05, forward_time=0.238, loss_ctc=281.139, loss_att=291.522, acc=0.134, loss=288.407, backward_time=0.231, grad_norm=120.229, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.056, optim0_lr0=9.419e-06, train_time=1.572 +[gpub024:0/32] 2024-01-20 23:27:32,104 (trainer:737) INFO: 1epoch:train:5701-5800batch: iter_time=9.042e-05, forward_time=0.160, loss_ctc=249.249, loss_att=256.116, acc=0.144, loss=254.056, backward_time=0.224, grad_norm=121.123, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.051, optim0_lr0=9.586e-06, train_time=1.377 +[gpub024:0/32] 2024-01-20 23:29:51,600 (trainer:737) INFO: 1epoch:train:5801-5900batch: iter_time=9.051e-05, forward_time=0.161, loss_ctc=293.006, loss_att=296.923, acc=0.136, loss=295.748, backward_time=0.225, grad_norm=128.273, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=9.753e-06, train_time=1.395 +[gpub024:0/32] 2024-01-20 23:32:09,015 (trainer:737) INFO: 1epoch:train:5901-6000batch: iter_time=9.189e-05, forward_time=0.160, loss_ctc=272.142, loss_att=275.457, acc=0.137, loss=274.462, backward_time=0.226, grad_norm=125.595, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.050, optim0_lr0=9.919e-06, train_time=1.374 +[gpub024:0/32] 2024-01-20 23:34:29,290 (trainer:737) INFO: 1epoch:train:6001-6100batch: iter_time=9.056e-05, forward_time=0.328, loss_ctc=276.483, loss_att=282.505, acc=0.137, loss=280.699, backward_time=0.313, grad_norm=184.316, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.073, optim0_lr0=1.009e-05, train_time=1.402 +[gpub024:0/32] 2024-01-20 23:38:53,601 (trainer:737) INFO: 1epoch:train:6101-6200batch: iter_time=9.595e-05, forward_time=0.232, loss_ctc=293.494, loss_att=286.853, acc=0.140, loss=288.845, backward_time=0.247, grad_norm=150.799, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.050, optim0_lr0=1.025e-05, train_time=2.643 +[gpub024:0/32] 2024-01-20 23:40:24,738 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub024:0/32] 2024-01-20 23:40:44,202 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-20 23:40:47,822 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-20 23:40:47,822 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub024:0/32] 2024-01-20 23:40:47,829 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 00:01:51,031 (trainer:737) INFO: 1epoch:train:6201-6300batch: iter_time=6.556, forward_time=0.231, loss_ctc=285.457, loss_att=276.496, acc=0.144, loss=279.184, backward_time=0.229, grad_norm=122.960, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.054, optim0_lr0=1.042e-05, train_time=13.775 +[gpub024:0/32] 2024-01-21 00:03:47,863 (trainer:737) INFO: 1epoch:train:6301-6400batch: iter_time=7.840e-05, forward_time=0.161, loss_ctc=294.854, loss_att=299.251, acc=0.133, loss=297.932, backward_time=0.226, grad_norm=145.793, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.050, optim0_lr0=1.059e-05, train_time=1.168 +[gpub024:0/32] 2024-01-21 00:06:11,304 (trainer:737) INFO: 1epoch:train:6401-6500batch: iter_time=8.345e-05, forward_time=0.161, loss_ctc=287.621, loss_att=297.049, acc=0.138, loss=294.221, backward_time=0.226, grad_norm=156.545, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.075e-05, train_time=1.434 +[gpub024:0/32] 2024-01-21 00:08:43,939 (trainer:737) INFO: 1epoch:train:6501-6600batch: iter_time=8.770e-05, forward_time=0.160, loss_ctc=259.478, loss_att=259.336, acc=0.146, loss=259.379, backward_time=0.226, grad_norm=104.308, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.092e-05, train_time=1.526 +[gpub024:0/32] 2024-01-21 00:10:41,126 (trainer:737) INFO: 1epoch:train:6601-6700batch: iter_time=8.612e-05, forward_time=0.161, loss_ctc=291.348, loss_att=285.394, acc=0.141, loss=287.180, backward_time=0.226, grad_norm=146.355, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.109e-05, train_time=1.172 +[gpub024:0/32] 2024-01-21 00:13:41,365 (trainer:737) INFO: 1epoch:train:6701-6800batch: iter_time=9.318e-05, forward_time=0.349, loss_ctc=259.395, loss_att=255.467, acc=0.144, loss=256.645, backward_time=0.295, grad_norm=132.878, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.067, optim0_lr0=1.125e-05, train_time=1.802 +[gpub024:0/32] 2024-01-21 00:15:23,595 (trainer:737) INFO: 1epoch:train:6801-6900batch: iter_time=9.421e-05, forward_time=0.165, loss_ctc=281.879, loss_att=279.304, acc=0.148, loss=280.077, backward_time=0.230, grad_norm=152.878, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.142e-05, train_time=1.022 +[gpub024:0/32] 2024-01-21 00:17:12,947 (trainer:737) INFO: 1epoch:train:6901-7000batch: iter_time=8.625e-05, forward_time=0.213, loss_ctc=275.708, loss_att=276.118, acc=0.142, loss=275.995, backward_time=0.230, grad_norm=149.103, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.058, optim0_lr0=1.159e-05, train_time=1.093 +[gpub024:0/32] 2024-01-21 00:19:05,440 (trainer:737) INFO: 1epoch:train:7001-7100batch: iter_time=8.654e-05, forward_time=0.161, loss_ctc=273.671, loss_att=268.499, acc=0.147, loss=270.050, backward_time=0.225, grad_norm=153.642, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.050, optim0_lr0=1.175e-05, train_time=1.125 +[gpub024:0/32] 2024-01-21 00:20:59,795 (trainer:737) INFO: 1epoch:train:7101-7200batch: iter_time=9.776e-05, forward_time=0.161, loss_ctc=277.449, loss_att=269.663, acc=0.151, loss=271.999, backward_time=0.223, grad_norm=129.304, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.050, optim0_lr0=1.192e-05, train_time=1.143 +[gpub024:0/32] 2024-01-21 00:23:01,780 (trainer:737) INFO: 1epoch:train:7201-7300batch: iter_time=9.702e-05, forward_time=0.161, loss_ctc=264.548, loss_att=264.024, acc=0.147, loss=264.181, backward_time=0.225, grad_norm=128.648, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.209e-05, train_time=1.220 +[gpub024:0/32] 2024-01-21 00:25:09,228 (trainer:737) INFO: 1epoch:train:7301-7400batch: iter_time=8.854e-05, forward_time=0.162, loss_ctc=294.659, loss_att=280.789, acc=0.144, loss=284.950, backward_time=0.226, grad_norm=124.475, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.225e-05, train_time=1.274 +[gpub024:0/32] 2024-01-21 00:28:16,872 (trainer:737) INFO: 1epoch:train:7401-7500batch: iter_time=9.139e-05, forward_time=0.428, loss_ctc=288.436, loss_att=273.891, acc=0.144, loss=278.254, backward_time=0.319, grad_norm=166.369, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.068, optim0_lr0=1.242e-05, train_time=1.876 +[gpub024:0/32] 2024-01-21 00:28:36,900 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub024:0/32] 2024-01-21 00:28:55,574 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 00:28:59,087 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 00:28:59,087 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub024:0/32] 2024-01-21 00:28:59,248 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 00:40:21,220 (trainer:737) INFO: 1epoch:train:7501-7600batch: iter_time=6.127, forward_time=0.230, loss_ctc=277.883, loss_att=283.573, acc=0.154, loss=281.866, backward_time=0.230, grad_norm=121.577, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.053, optim0_lr0=1.259e-05, train_time=7.243 +[gpub024:0/32] 2024-01-21 00:42:24,844 (trainer:737) INFO: 1epoch:train:7601-7700batch: iter_time=8.879e-05, forward_time=0.162, loss_ctc=277.535, loss_att=286.191, acc=0.141, loss=283.594, backward_time=0.227, grad_norm=126.737, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.275e-05, train_time=1.236 +[gpub024:0/32] 2024-01-21 00:45:21,985 (trainer:737) INFO: 1epoch:train:7701-7800batch: iter_time=8.881e-05, forward_time=0.187, loss_ctc=289.913, loss_att=292.983, acc=0.148, loss=292.062, backward_time=0.225, grad_norm=121.086, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.051, optim0_lr0=1.292e-05, train_time=1.771 +[gpub024:0/32] 2024-01-21 00:47:55,822 (trainer:737) INFO: 1epoch:train:7801-7900batch: iter_time=9.194e-05, forward_time=0.419, loss_ctc=257.546, loss_att=263.915, acc=0.155, loss=262.005, backward_time=0.319, grad_norm=111.737, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.071, optim0_lr0=1.309e-05, train_time=1.538 +[gpub024:0/32] 2024-01-21 00:50:18,405 (trainer:737) INFO: 1epoch:train:7901-8000batch: iter_time=9.613e-05, forward_time=0.203, loss_ctc=282.081, loss_att=286.559, acc=0.145, loss=285.216, backward_time=0.235, grad_norm=131.767, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.058, optim0_lr0=1.325e-05, train_time=1.425 +[gpub024:0/32] 2024-01-21 00:51:46,881 (trainer:737) INFO: 1epoch:train:8001-8100batch: iter_time=9.102e-05, forward_time=0.163, loss_ctc=288.654, loss_att=290.892, acc=0.150, loss=290.221, backward_time=0.229, grad_norm=161.444, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.051, optim0_lr0=1.342e-05, train_time=0.885 +[gpub024:0/32] 2024-01-21 00:54:42,155 (trainer:737) INFO: 1epoch:train:8101-8200batch: iter_time=8.625e-05, forward_time=0.161, loss_ctc=279.314, loss_att=279.520, acc=0.151, loss=279.458, backward_time=0.224, grad_norm=216.407, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.050, optim0_lr0=1.359e-05, train_time=1.753 +[gpub024:0/32] 2024-01-21 00:58:11,092 (trainer:737) INFO: 1epoch:train:8201-8300batch: iter_time=8.959e-05, forward_time=0.300, loss_ctc=243.958, loss_att=251.134, acc=0.160, loss=248.981, backward_time=0.295, grad_norm=127.653, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.066, optim0_lr0=1.375e-05, train_time=2.089 +[gpub024:0/32] 2024-01-21 00:59:59,771 (trainer:737) INFO: 1epoch:train:8301-8400batch: iter_time=8.893e-05, forward_time=0.320, loss_ctc=288.230, loss_att=282.975, acc=0.155, loss=284.552, backward_time=0.267, grad_norm=155.130, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.066, optim0_lr0=1.392e-05, train_time=1.087 +[gpub024:0/32] 2024-01-21 01:02:11,491 (trainer:737) INFO: 1epoch:train:8401-8500batch: iter_time=8.713e-05, forward_time=0.161, loss_ctc=265.490, loss_att=264.901, acc=0.156, loss=265.077, backward_time=0.227, grad_norm=126.736, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.050, optim0_lr0=1.409e-05, train_time=1.315 +[gpub024:0/32] 2024-01-21 01:06:06,000 (trainer:737) INFO: 1epoch:train:8501-8600batch: iter_time=8.672e-05, forward_time=0.163, loss_ctc=273.922, loss_att=278.069, acc=0.152, loss=276.825, backward_time=0.225, grad_norm=141.164, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.050, optim0_lr0=1.425e-05, train_time=2.347 +[gpub024:0/32] 2024-01-21 01:07:53,740 (trainer:737) INFO: 1epoch:train:8601-8700batch: iter_time=9.219e-05, forward_time=0.185, loss_ctc=290.834, loss_att=281.552, acc=0.156, loss=284.337, backward_time=0.231, grad_norm=160.741, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.050, optim0_lr0=1.442e-05, train_time=1.078 +[gpub024:0/32] 2024-01-21 01:10:32,057 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub024:0/32] 2024-01-21 01:11:00,342 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 01:11:04,637 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 01:11:04,637 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub024:0/32] 2024-01-21 01:11:04,641 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 01:34:03,599 (trainer:737) INFO: 1epoch:train:8701-8800batch: iter_time=7.727, forward_time=0.397, loss_ctc=278.410, loss_att=262.287, acc=0.162, loss=267.124, backward_time=0.290, grad_norm=128.774, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.068, optim0_lr0=1.459e-05, train_time=15.698 +[gpub024:0/32] 2024-01-21 01:35:54,043 (trainer:737) INFO: 1epoch:train:8801-8900batch: iter_time=8.431e-05, forward_time=0.318, loss_ctc=289.419, loss_att=290.680, acc=0.149, loss=290.302, backward_time=0.289, grad_norm=131.349, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.068, optim0_lr0=1.475e-05, train_time=1.105 +[gpub024:0/32] 2024-01-21 01:38:57,268 (trainer:737) INFO: 1epoch:train:8901-9000batch: iter_time=8.220e-05, forward_time=0.374, loss_ctc=280.590, loss_att=280.118, acc=0.157, loss=280.259, backward_time=0.260, grad_norm=144.201, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.068, optim0_lr0=1.492e-05, train_time=1.831 +[gpub024:0/32] 2024-01-21 01:42:42,364 (trainer:737) INFO: 1epoch:train:9001-9100batch: iter_time=8.654e-05, forward_time=0.426, loss_ctc=256.042, loss_att=253.245, acc=0.164, loss=254.084, backward_time=0.362, grad_norm=124.603, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.062, optim0_lr0=1.509e-05, train_time=2.252 +[gpub024:0/32] 2024-01-21 01:45:04,495 (trainer:737) INFO: 1epoch:train:9101-9200batch: iter_time=8.543e-05, forward_time=0.270, loss_ctc=286.072, loss_att=273.971, acc=0.160, loss=277.602, backward_time=0.264, grad_norm=141.570, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.064, optim0_lr0=1.525e-05, train_time=1.421 +[gpub024:0/32] 2024-01-21 01:47:44,070 (trainer:737) INFO: 1epoch:train:9201-9300batch: iter_time=8.352e-05, forward_time=0.337, loss_ctc=255.778, loss_att=249.880, acc=0.158, loss=251.649, backward_time=0.305, grad_norm=142.729, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.060, optim0_lr0=1.542e-05, train_time=1.596 +[gpub024:0/32] 2024-01-21 01:50:08,293 (trainer:737) INFO: 1epoch:train:9301-9400batch: iter_time=9.111e-05, forward_time=0.234, loss_ctc=277.610, loss_att=271.383, acc=0.167, loss=273.251, backward_time=0.255, grad_norm=175.939, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.064, optim0_lr0=1.559e-05, train_time=1.442 +[gpub024:0/32] 2024-01-21 01:54:10,177 (trainer:737) INFO: 1epoch:train:9401-9500batch: iter_time=9.213e-05, forward_time=0.397, loss_ctc=274.070, loss_att=270.063, acc=0.157, loss=271.265, backward_time=0.357, grad_norm=207.861, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.575e-05, train_time=2.417 +[gpub024:0/32] 2024-01-21 01:56:10,615 (trainer:737) INFO: 1epoch:train:9501-9600batch: iter_time=9.055e-05, forward_time=0.194, loss_ctc=268.234, loss_att=261.396, acc=0.165, loss=263.448, backward_time=0.263, grad_norm=135.097, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.061, optim0_lr0=1.592e-05, train_time=1.206 +[gpub024:0/32] 2024-01-21 01:59:07,664 (trainer:737) INFO: 1epoch:train:9601-9700batch: iter_time=7.795e-04, forward_time=0.536, loss_ctc=272.750, loss_att=258.889, acc=0.168, loss=263.048, backward_time=0.329, grad_norm=163.494, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.064, optim0_lr0=1.609e-05, train_time=1.768 +[gpub024:0/32] 2024-01-21 02:01:58,862 (trainer:737) INFO: 1epoch:train:9701-9800batch: iter_time=8.461e-05, forward_time=0.189, loss_ctc=258.195, loss_att=253.960, acc=0.163, loss=255.231, backward_time=0.232, grad_norm=128.764, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.050, optim0_lr0=1.625e-05, train_time=1.714 +[gpub024:0/32] 2024-01-21 02:06:03,753 (trainer:737) INFO: 1epoch:train:9801-9900batch: iter_time=0.002, forward_time=0.458, loss_ctc=290.756, loss_att=281.045, acc=0.159, loss=283.958, backward_time=0.318, grad_norm=130.748, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.081, optim0_lr0=1.642e-05, train_time=2.449 +[gpub024:0/32] 2024-01-21 02:07:33,302 (trainer:737) INFO: 1epoch:train:9901-10000batch: iter_time=8.674e-05, forward_time=0.196, loss_ctc=283.997, loss_att=267.150, acc=0.166, loss=272.204, backward_time=0.254, grad_norm=154.014, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.051, optim0_lr0=1.659e-05, train_time=0.894 +[gpub024:0/32] 2024-01-21 02:07:53,641 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub024:0/32] 2024-01-21 02:08:12,273 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 02:08:15,869 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 02:08:15,869 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub024:0/32] 2024-01-21 02:08:15,872 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 02:21:07,091 (trainer:737) INFO: 1epoch:train:10001-10100batch: iter_time=6.605, forward_time=0.161, loss_ctc=273.675, loss_att=256.946, acc=0.169, loss=261.965, backward_time=0.226, grad_norm=133.749, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.675e-05, train_time=8.139 +[gpub024:0/32] 2024-01-21 02:23:15,058 (trainer:737) INFO: 1epoch:train:10101-10200batch: iter_time=8.829e-05, forward_time=0.161, loss_ctc=272.759, loss_att=259.320, acc=0.156, loss=263.352, backward_time=0.226, grad_norm=150.880, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.692e-05, train_time=1.279 +[gpub024:0/32] 2024-01-21 02:26:38,602 (trainer:737) INFO: 1epoch:train:10201-10300batch: iter_time=8.969e-05, forward_time=0.164, loss_ctc=286.823, loss_att=272.190, acc=0.164, loss=276.580, backward_time=0.226, grad_norm=138.208, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.709e-05, train_time=2.035 +[gpub024:0/32] 2024-01-21 02:28:59,593 (trainer:737) INFO: 1epoch:train:10301-10400batch: iter_time=9.324e-05, forward_time=0.160, loss_ctc=251.876, loss_att=232.726, acc=0.172, loss=238.471, backward_time=0.224, grad_norm=129.368, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.725e-05, train_time=1.410 +[gpub024:0/32] 2024-01-21 02:31:53,662 (trainer:737) INFO: 1epoch:train:10401-10500batch: iter_time=8.964e-05, forward_time=0.312, loss_ctc=278.952, loss_att=258.765, acc=0.161, loss=264.821, backward_time=0.307, grad_norm=146.759, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.066, optim0_lr0=1.742e-05, train_time=1.740 +[gpub024:0/32] 2024-01-21 02:33:45,295 (trainer:737) INFO: 1epoch:train:10501-10600batch: iter_time=8.545e-05, forward_time=0.399, loss_ctc=282.027, loss_att=256.172, acc=0.168, loss=263.928, backward_time=0.249, grad_norm=166.966, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.759e-05, train_time=1.117 +[gpub024:0/32] 2024-01-21 02:37:17,762 (trainer:737) INFO: 1epoch:train:10601-10700batch: iter_time=8.880e-05, forward_time=0.161, loss_ctc=272.999, loss_att=256.802, acc=0.167, loss=261.661, backward_time=0.223, grad_norm=179.897, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.775e-05, train_time=2.122 +[gpub024:0/32] 2024-01-21 02:39:39,221 (trainer:737) INFO: 1epoch:train:10701-10800batch: iter_time=8.818e-05, forward_time=0.160, loss_ctc=239.982, loss_att=224.637, acc=0.178, loss=229.240, backward_time=0.225, grad_norm=126.363, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.792e-05, train_time=1.417 +[gpub024:0/32] 2024-01-21 02:41:42,389 (trainer:737) INFO: 1epoch:train:10801-10900batch: iter_time=9.326e-05, forward_time=0.161, loss_ctc=285.429, loss_att=261.823, acc=0.167, loss=268.905, backward_time=0.225, grad_norm=142.748, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.809e-05, train_time=1.231 +[gpub024:0/32] 2024-01-21 02:43:50,304 (trainer:737) INFO: 1epoch:train:10901-11000batch: iter_time=9.409e-05, forward_time=0.161, loss_ctc=260.227, loss_att=239.086, acc=0.174, loss=245.428, backward_time=0.225, grad_norm=115.044, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.825e-05, train_time=1.279 +[gpub024:0/32] 2024-01-21 02:45:31,853 (trainer:737) INFO: 1epoch:train:11001-11100batch: iter_time=8.988e-05, forward_time=0.161, loss_ctc=266.320, loss_att=247.667, acc=0.170, loss=253.263, backward_time=0.225, grad_norm=145.713, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.842e-05, train_time=1.015 +[gpub024:0/32] 2024-01-21 02:48:42,670 (trainer:737) INFO: 1epoch:train:11101-11200batch: iter_time=8.137e-05, forward_time=0.306, loss_ctc=285.860, loss_att=252.738, acc=0.176, loss=262.674, backward_time=0.288, grad_norm=167.609, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.073, optim0_lr0=1.859e-05, train_time=1.908 +[gpub024:0/32] 2024-01-21 02:49:52,391 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub024:0/32] 2024-01-21 02:50:11,504 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 02:50:15,128 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 02:50:15,128 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub024:0/32] 2024-01-21 02:50:15,227 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 03:02:27,821 (trainer:737) INFO: 1epoch:train:11201-11300batch: iter_time=6.832, forward_time=0.363, loss_ctc=277.302, loss_att=241.976, acc=0.181, loss=252.574, backward_time=0.254, grad_norm=154.550, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.060, optim0_lr0=1.875e-05, train_time=8.251 +[gpub024:0/32] 2024-01-21 03:03:56,469 (trainer:737) INFO: 1epoch:train:11301-11400batch: iter_time=7.928e-05, forward_time=0.162, loss_ctc=285.277, loss_att=260.747, acc=0.165, loss=268.106, backward_time=0.227, grad_norm=133.876, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.892e-05, train_time=0.887 +[gpub024:0/32] 2024-01-21 03:06:59,988 (trainer:737) INFO: 1epoch:train:11401-11500batch: iter_time=8.308e-05, forward_time=0.162, loss_ctc=277.671, loss_att=262.178, acc=0.172, loss=266.826, backward_time=0.227, grad_norm=132.603, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.909e-05, train_time=1.835 +[gpub024:0/32] 2024-01-21 03:09:52,471 (trainer:737) INFO: 1epoch:train:11501-11600batch: iter_time=8.317e-05, forward_time=0.160, loss_ctc=250.428, loss_att=227.706, acc=0.182, loss=234.522, backward_time=0.223, grad_norm=105.524, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.050, optim0_lr0=1.925e-05, train_time=1.725 +[gpub024:0/32] 2024-01-21 03:13:05,110 (trainer:737) INFO: 1epoch:train:11601-11700batch: iter_time=9.427e-05, forward_time=0.161, loss_ctc=281.736, loss_att=252.370, acc=0.175, loss=261.179, backward_time=0.224, grad_norm=174.028, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.051, optim0_lr0=1.942e-05, train_time=1.926 +[gpub024:0/32] 2024-01-21 03:14:48,702 (trainer:737) INFO: 1epoch:train:11701-11800batch: iter_time=8.426e-05, forward_time=0.285, loss_ctc=252.192, loss_att=224.693, acc=0.176, loss=232.943, backward_time=0.298, grad_norm=137.661, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.059, optim0_lr0=1.959e-05, train_time=1.034 +[gpub024:0/32] 2024-01-21 03:18:18,883 (trainer:737) INFO: 1epoch:train:11801-11900batch: iter_time=8.260e-05, forward_time=0.213, loss_ctc=272.494, loss_att=247.291, acc=0.189, loss=254.852, backward_time=0.249, grad_norm=184.464, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.054, optim0_lr0=1.975e-05, train_time=2.103 +[gpub024:0/32] 2024-01-21 03:20:45,968 (trainer:737) INFO: 1epoch:train:11901-12000batch: iter_time=8.428e-05, forward_time=0.328, loss_ctc=266.780, loss_att=244.474, acc=0.174, loss=251.166, backward_time=0.235, grad_norm=170.144, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.064, optim0_lr0=1.992e-05, train_time=1.470 +[gpub024:0/32] 2024-01-21 03:24:31,243 (trainer:737) INFO: 1epoch:train:12001-12100batch: iter_time=9.120e-05, forward_time=0.163, loss_ctc=263.322, loss_att=238.195, acc=0.180, loss=245.733, backward_time=0.224, grad_norm=136.046, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.009e-05, train_time=2.253 +[gpub024:0/32] 2024-01-21 03:26:03,517 (trainer:737) INFO: 1epoch:train:12101-12200batch: iter_time=7.956e-05, forward_time=0.160, loss_ctc=268.671, loss_att=240.877, acc=0.184, loss=249.215, backward_time=0.225, grad_norm=132.638, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.025e-05, train_time=0.923 +[gpub024:0/32] 2024-01-21 03:27:58,533 (trainer:737) INFO: 1epoch:train:12201-12300batch: iter_time=7.989e-05, forward_time=0.160, loss_ctc=253.662, loss_att=234.867, acc=0.178, loss=240.505, backward_time=0.224, grad_norm=119.769, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.042e-05, train_time=1.150 +[gpub024:0/32] 2024-01-21 03:29:50,067 (trainer:737) INFO: 1epoch:train:12301-12400batch: iter_time=7.787e-05, forward_time=0.162, loss_ctc=286.632, loss_att=249.840, acc=0.176, loss=260.878, backward_time=0.228, grad_norm=127.209, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.059e-05, train_time=1.115 +[gpub024:0/32] 2024-01-21 03:31:24,231 (trainer:737) INFO: 1epoch:train:12401-12500batch: iter_time=8.204e-05, forward_time=0.216, loss_ctc=280.054, loss_att=244.005, acc=0.181, loss=254.820, backward_time=0.263, grad_norm=143.667, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.059, optim0_lr0=2.075e-05, train_time=0.941 +[gpub024:0/32] 2024-01-21 03:31:44,260 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub024:0/32] 2024-01-21 03:32:02,425 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 03:32:05,952 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 03:32:05,952 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub024:0/32] 2024-01-21 03:32:05,955 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 03:38:57,326 (trainer:737) INFO: 1epoch:train:12501-12600batch: iter_time=3.558, forward_time=0.209, loss_ctc=268.223, loss_att=251.450, acc=0.185, loss=256.482, backward_time=0.230, grad_norm=142.825, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.052, optim0_lr0=2.092e-05, train_time=4.529 +[gpub024:0/32] 2024-01-21 03:41:54,500 (trainer:737) INFO: 1epoch:train:12601-12700batch: iter_time=8.325e-05, forward_time=0.160, loss_ctc=265.980, loss_att=251.895, acc=0.169, loss=256.120, backward_time=0.224, grad_norm=125.970, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.109e-05, train_time=1.773 +[gpub024:0/32] 2024-01-21 03:44:31,962 (trainer:737) INFO: 1epoch:train:12701-12800batch: iter_time=7.995e-05, forward_time=0.162, loss_ctc=279.990, loss_att=261.107, acc=0.179, loss=266.772, backward_time=0.228, grad_norm=115.312, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.125e-05, train_time=1.574 +[gpub024:0/32] 2024-01-21 03:48:04,859 (trainer:737) INFO: 1epoch:train:12801-12900batch: iter_time=8.525e-05, forward_time=0.161, loss_ctc=249.224, loss_att=234.362, acc=0.188, loss=238.821, backward_time=0.224, grad_norm=144.959, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.142e-05, train_time=2.129 +[gpub024:0/32] 2024-01-21 03:49:49,050 (trainer:737) INFO: 1epoch:train:12901-13000batch: iter_time=8.209e-05, forward_time=0.161, loss_ctc=274.109, loss_att=254.440, acc=0.174, loss=260.341, backward_time=0.227, grad_norm=160.819, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.159e-05, train_time=1.042 +[gpub024:0/32] 2024-01-21 03:54:23,570 (trainer:737) INFO: 1epoch:train:13001-13100batch: iter_time=8.262e-05, forward_time=0.296, loss_ctc=276.021, loss_att=255.708, acc=0.185, loss=261.802, backward_time=0.292, grad_norm=139.119, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.073, optim0_lr0=2.175e-05, train_time=2.745 +[gpub024:0/32] 2024-01-21 03:57:42,111 (trainer:737) INFO: 1epoch:train:13101-13200batch: iter_time=8.509e-05, forward_time=0.321, loss_ctc=267.229, loss_att=248.614, acc=0.185, loss=254.198, backward_time=0.259, grad_norm=174.767, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.059, optim0_lr0=2.192e-05, train_time=1.984 +[gpub024:0/32] 2024-01-21 04:00:38,500 (trainer:737) INFO: 1epoch:train:13201-13300batch: iter_time=8.217e-05, forward_time=0.202, loss_ctc=235.487, loss_att=221.467, acc=0.189, loss=225.673, backward_time=0.226, grad_norm=145.404, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.209e-05, train_time=1.764 +[gpub024:0/32] 2024-01-21 04:02:00,119 (trainer:737) INFO: 1epoch:train:13301-13400batch: iter_time=9.035e-05, forward_time=0.161, loss_ctc=281.851, loss_att=254.881, acc=0.179, loss=262.972, backward_time=0.228, grad_norm=139.862, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.225e-05, train_time=0.817 +[gpub024:0/32] 2024-01-21 04:04:33,597 (trainer:737) INFO: 1epoch:train:13401-13500batch: iter_time=8.976e-05, forward_time=0.161, loss_ctc=255.035, loss_att=235.240, acc=0.187, loss=241.178, backward_time=0.226, grad_norm=103.883, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.242e-05, train_time=1.535 +[gpub024:0/32] 2024-01-21 04:08:30,555 (trainer:737) INFO: 1epoch:train:13501-13600batch: iter_time=8.333e-05, forward_time=0.172, loss_ctc=263.305, loss_att=247.813, acc=0.179, loss=252.460, backward_time=0.269, grad_norm=125.528, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.058, optim0_lr0=2.259e-05, train_time=2.369 +[gpub024:0/32] 2024-01-21 04:11:22,592 (trainer:737) INFO: 1epoch:train:13601-13700batch: iter_time=2.879e-04, forward_time=0.307, loss_ctc=280.670, loss_att=252.997, acc=0.185, loss=261.299, backward_time=0.355, grad_norm=149.947, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.068, optim0_lr0=2.275e-05, train_time=1.720 +[gpub024:0/32] 2024-01-21 04:12:51,360 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub024:0/32] 2024-01-21 04:13:09,996 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 04:13:13,618 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 04:13:13,618 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub024:0/32] 2024-01-21 04:13:13,675 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 04:24:34,471 (trainer:737) INFO: 1epoch:train:13701-13800batch: iter_time=6.534, forward_time=0.272, loss_ctc=272.963, loss_att=239.046, acc=0.189, loss=249.221, backward_time=0.237, grad_norm=138.956, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.053, optim0_lr0=2.292e-05, train_time=7.917 +[gpub024:0/32] 2024-01-21 04:25:56,710 (trainer:737) INFO: 1epoch:train:13801-13900batch: iter_time=7.908e-05, forward_time=0.162, loss_ctc=282.201, loss_att=263.690, acc=0.172, loss=269.243, backward_time=0.228, grad_norm=115.834, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.309e-05, train_time=0.824 +[gpub024:0/32] 2024-01-21 04:28:35,165 (trainer:737) INFO: 1epoch:train:13901-14000batch: iter_time=8.137e-05, forward_time=0.161, loss_ctc=271.403, loss_att=253.846, acc=0.180, loss=259.113, backward_time=0.226, grad_norm=119.145, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.050, optim0_lr0=2.325e-05, train_time=1.584 +[gpub024:0/32] 2024-01-21 04:31:01,455 (trainer:737) INFO: 1epoch:train:14001-14100batch: iter_time=8.272e-05, forward_time=0.161, loss_ctc=246.686, loss_att=228.488, acc=0.192, loss=233.947, backward_time=0.224, grad_norm=93.795, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.050, optim0_lr0=2.342e-05, train_time=1.463 +[gpub024:0/32] 2024-01-21 04:34:16,980 (trainer:737) INFO: 1epoch:train:14101-14200batch: iter_time=8.490e-05, forward_time=0.220, loss_ctc=277.616, loss_att=249.427, acc=0.186, loss=257.884, backward_time=0.237, grad_norm=149.300, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.058, optim0_lr0=2.359e-05, train_time=1.955 +[gpub024:0/32] 2024-01-21 04:37:14,736 (trainer:737) INFO: 1epoch:train:14201-14300batch: iter_time=0.007, forward_time=0.414, loss_ctc=246.626, loss_att=225.503, acc=0.184, loss=231.840, backward_time=0.315, grad_norm=136.346, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.074, optim0_lr0=2.375e-05, train_time=1.777 +[gpub024:0/32] 2024-01-21 04:39:59,694 (trainer:737) INFO: 1epoch:train:14301-14400batch: iter_time=8.136e-05, forward_time=0.164, loss_ctc=264.381, loss_att=244.665, acc=0.200, loss=250.579, backward_time=0.229, grad_norm=128.314, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.050, optim0_lr0=2.392e-05, train_time=1.649 +[gpub024:0/32] 2024-01-21 04:41:56,593 (trainer:737) INFO: 1epoch:train:14401-14500batch: iter_time=8.377e-05, forward_time=0.162, loss_ctc=262.655, loss_att=243.951, acc=0.182, loss=249.562, backward_time=0.227, grad_norm=150.819, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.050, optim0_lr0=2.409e-05, train_time=1.169 +[gpub024:0/32] 2024-01-21 04:44:41,922 (trainer:737) INFO: 1epoch:train:14501-14600batch: iter_time=8.189e-05, forward_time=0.161, loss_ctc=257.958, loss_att=235.954, acc=0.188, loss=242.555, backward_time=0.223, grad_norm=114.181, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.050, optim0_lr0=2.425e-05, train_time=1.654 +[gpub024:0/32] 2024-01-21 04:48:02,004 (trainer:737) INFO: 1epoch:train:14601-14700batch: iter_time=8.097e-05, forward_time=0.161, loss_ctc=265.878, loss_att=234.991, acc=0.192, loss=244.257, backward_time=0.224, grad_norm=125.507, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.051, optim0_lr0=2.442e-05, train_time=2.001 +[gpub024:0/32] 2024-01-21 04:49:32,776 (trainer:737) INFO: 1epoch:train:14701-14800batch: iter_time=8.562e-05, forward_time=0.161, loss_ctc=250.036, loss_att=232.270, acc=0.186, loss=237.600, backward_time=0.229, grad_norm=130.132, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.051, optim0_lr0=2.459e-05, train_time=0.907 +[gpub024:0/32] 2024-01-21 04:51:56,200 (trainer:737) INFO: 1epoch:train:14801-14900batch: iter_time=7.885e-05, forward_time=0.306, loss_ctc=282.079, loss_att=255.372, acc=0.180, loss=263.384, backward_time=0.303, grad_norm=121.399, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.066, optim0_lr0=2.475e-05, train_time=1.434 +[gpub024:0/32] 2024-01-21 04:55:21,942 (trainer:737) INFO: 1epoch:train:14901-15000batch: iter_time=8.552e-05, forward_time=0.308, loss_ctc=273.545, loss_att=244.581, acc=0.188, loss=253.270, backward_time=0.251, grad_norm=110.045, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.067, optim0_lr0=2.492e-05, train_time=2.057 +[gpub024:0/32] 2024-01-21 05:31:23,617 (trainer:343) INFO: 1epoch results: [train] iter_time=0.524, forward_time=0.359, loss_ctc=356.728, loss_att=287.104, acc=0.134, loss=307.991, backward_time=0.302, grad_norm=358.379, clip=100.000, loss_scale=1.669e+06, optim_step_time=0.068, optim0_lr0=1.250e-05, train_time=2.468, time=10 hours, 17 minutes and 19.89 seconds, total_count=15000, gpu_max_cached_mem_GB=24.412, [valid] loss_ctc=192.611, cer_ctc=0.981, loss_att=170.394, acc=0.159, cer=0.753, wer=1.000, loss=177.059, time=35 minutes and 37.51 seconds, total_count=4671, gpu_max_cached_mem_GB=24.412 +[gpub024:0/32] 2024-01-21 05:31:51,835 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub024:0/32] 2024-01-21 05:31:51,970 (trainer:272) INFO: 2/45epoch started. Estimated time to finish: 2 weeks, 5 days and 23 hours +[gpub024:0/32] 2024-01-21 05:31:52,177 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub024:0/32] 2024-01-21 05:32:10,284 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 05:32:13,611 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 05:32:13,611 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub024:0/32] 2024-01-21 05:32:13,615 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 05:44:05,340 (trainer:737) INFO: 2epoch:train:1-100batch: iter_time=6.321, forward_time=0.277, loss_ctc=278.550, loss_att=243.434, acc=0.187, loss=253.969, backward_time=0.260, grad_norm=117.097, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.055, optim0_lr0=2.509e-05, train_time=7.332 +[gpub024:0/32] 2024-01-21 05:45:59,240 (trainer:737) INFO: 2epoch:train:101-200batch: iter_time=9.186e-05, forward_time=0.161, loss_ctc=255.487, loss_att=221.012, acc=0.197, loss=231.355, backward_time=0.229, grad_norm=116.468, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.051, optim0_lr0=2.525e-05, train_time=1.139 +[gpub024:0/32] 2024-01-21 05:48:09,893 (trainer:737) INFO: 2epoch:train:201-300batch: iter_time=9.197e-05, forward_time=0.216, loss_ctc=245.728, loss_att=216.154, acc=0.202, loss=225.026, backward_time=0.242, grad_norm=110.269, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.062, optim0_lr0=2.542e-05, train_time=1.306 +[gpub024:0/32] 2024-01-21 05:50:01,902 (trainer:737) INFO: 2epoch:train:301-400batch: iter_time=9.275e-05, forward_time=0.199, loss_ctc=260.214, loss_att=234.469, acc=0.185, loss=242.192, backward_time=0.261, grad_norm=94.800, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.061, optim0_lr0=2.559e-05, train_time=1.120 +[gpub024:0/32] 2024-01-21 05:52:40,201 (trainer:737) INFO: 2epoch:train:401-500batch: iter_time=8.907e-05, forward_time=0.224, loss_ctc=280.631, loss_att=236.690, acc=0.179, loss=249.872, backward_time=0.269, grad_norm=140.323, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.059, optim0_lr0=2.575e-05, train_time=1.583 +[gpub024:0/32] 2024-01-21 05:54:25,128 (trainer:737) INFO: 2epoch:train:501-600batch: iter_time=8.419e-05, forward_time=0.161, loss_ctc=244.359, loss_att=214.584, acc=0.194, loss=223.517, backward_time=0.224, grad_norm=113.378, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.051, optim0_lr0=2.592e-05, train_time=1.049 +[gpub024:0/32] 2024-01-21 05:57:29,351 (trainer:737) INFO: 2epoch:train:601-700batch: iter_time=9.334e-05, forward_time=0.196, loss_ctc=238.135, loss_att=206.846, acc=0.196, loss=216.233, backward_time=0.224, grad_norm=89.904, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.053, optim0_lr0=2.609e-05, train_time=1.842 +[gpub024:0/32] 2024-01-21 06:00:02,999 (trainer:737) INFO: 2epoch:train:701-800batch: iter_time=8.976e-05, forward_time=0.163, loss_ctc=229.877, loss_att=211.912, acc=0.199, loss=217.301, backward_time=0.230, grad_norm=96.500, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.051, optim0_lr0=2.625e-05, train_time=1.536 +[gpub024:0/32] 2024-01-21 06:01:57,144 (trainer:737) INFO: 2epoch:train:801-900batch: iter_time=9.453e-05, forward_time=0.226, loss_ctc=266.449, loss_att=226.756, acc=0.199, loss=238.664, backward_time=0.231, grad_norm=100.461, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.056, optim0_lr0=2.642e-05, train_time=1.141 +[gpub024:0/32] 2024-01-21 06:04:12,356 (trainer:737) INFO: 2epoch:train:901-1000batch: iter_time=9.299e-05, forward_time=0.305, loss_ctc=224.168, loss_att=200.005, acc=0.202, loss=207.254, backward_time=0.230, grad_norm=93.028, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.063, optim0_lr0=2.659e-05, train_time=1.352 +[gpub024:0/32] 2024-01-21 06:06:57,031 (trainer:737) INFO: 2epoch:train:1001-1100batch: iter_time=8.859e-05, forward_time=0.234, loss_ctc=258.210, loss_att=231.613, acc=0.185, loss=239.592, backward_time=0.302, grad_norm=111.040, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.061, optim0_lr0=2.675e-05, train_time=1.646 +[gpub024:0/32] 2024-01-21 06:08:46,365 (trainer:737) INFO: 2epoch:train:1101-1200batch: iter_time=9.132e-05, forward_time=0.170, loss_ctc=203.086, loss_att=180.971, acc=0.207, loss=187.605, backward_time=0.223, grad_norm=75.434, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.692e-05, train_time=1.093 +[gpub024:0/32] 2024-01-21 06:10:08,731 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub024:0/32] 2024-01-21 06:10:27,745 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 06:10:31,286 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 06:10:31,286 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub024:0/32] 2024-01-21 06:10:31,310 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 06:21:41,365 (trainer:737) INFO: 2epoch:train:1201-1300batch: iter_time=6.441, forward_time=0.202, loss_ctc=247.800, loss_att=211.359, acc=0.209, loss=222.291, backward_time=0.229, grad_norm=103.662, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.052, optim0_lr0=2.709e-05, train_time=7.750 +[gpub024:0/32] 2024-01-21 06:23:36,667 (trainer:737) INFO: 2epoch:train:1301-1400batch: iter_time=8.103e-05, forward_time=0.194, loss_ctc=294.381, loss_att=252.861, acc=0.184, loss=265.317, backward_time=0.226, grad_norm=115.322, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.063, optim0_lr0=2.725e-05, train_time=1.153 +[gpub024:0/32] 2024-01-21 06:25:32,387 (trainer:737) INFO: 2epoch:train:1401-1500batch: iter_time=8.503e-05, forward_time=0.213, loss_ctc=238.791, loss_att=207.607, acc=0.207, loss=216.962, backward_time=0.261, grad_norm=118.804, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.058, optim0_lr0=2.742e-05, train_time=1.157 +[gpub024:0/32] 2024-01-21 06:27:05,366 (trainer:737) INFO: 2epoch:train:1501-1600batch: iter_time=8.172e-05, forward_time=0.163, loss_ctc=231.221, loss_att=206.621, acc=0.203, loss=214.001, backward_time=0.227, grad_norm=85.674, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.759e-05, train_time=0.930 +[gpub024:0/32] 2024-01-21 06:29:10,253 (trainer:737) INFO: 2epoch:train:1601-1700batch: iter_time=9.047e-05, forward_time=0.181, loss_ctc=287.987, loss_att=246.891, acc=0.176, loss=259.220, backward_time=0.229, grad_norm=145.131, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.056, optim0_lr0=2.775e-05, train_time=1.249 +[gpub024:0/32] 2024-01-21 06:31:21,717 (trainer:737) INFO: 2epoch:train:1701-1800batch: iter_time=8.620e-05, forward_time=0.166, loss_ctc=270.589, loss_att=231.940, acc=0.186, loss=243.534, backward_time=0.226, grad_norm=120.163, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.792e-05, train_time=1.314 +[gpub024:0/32] 2024-01-21 06:33:51,629 (trainer:737) INFO: 2epoch:train:1801-1900batch: iter_time=9.134e-05, forward_time=0.189, loss_ctc=200.891, loss_att=178.210, acc=0.213, loss=185.015, backward_time=0.275, grad_norm=86.129, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.057, optim0_lr0=2.809e-05, train_time=1.499 +[gpub024:0/32] 2024-01-21 06:36:06,096 (trainer:737) INFO: 2epoch:train:1901-2000batch: iter_time=8.785e-05, forward_time=0.173, loss_ctc=261.200, loss_att=230.601, acc=0.193, loss=239.781, backward_time=0.232, grad_norm=98.514, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.072, optim0_lr0=2.825e-05, train_time=1.345 +[gpub024:0/32] 2024-01-21 06:37:56,697 (trainer:737) INFO: 2epoch:train:2001-2100batch: iter_time=9.503e-05, forward_time=0.338, loss_ctc=241.390, loss_att=213.947, acc=0.202, loss=222.180, backward_time=0.274, grad_norm=80.923, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.061, optim0_lr0=2.842e-05, train_time=1.106 +[gpub024:0/32] 2024-01-21 06:39:35,880 (trainer:737) INFO: 2epoch:train:2101-2200batch: iter_time=9.507e-05, forward_time=0.162, loss_ctc=245.696, loss_att=213.313, acc=0.204, loss=223.028, backward_time=0.226, grad_norm=104.165, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.859e-05, train_time=0.992 +[gpub024:0/32] 2024-01-21 06:41:22,762 (trainer:737) INFO: 2epoch:train:2201-2300batch: iter_time=1.042e-04, forward_time=0.161, loss_ctc=223.664, loss_att=195.760, acc=0.201, loss=204.131, backward_time=0.224, grad_norm=87.140, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.875e-05, train_time=1.069 +[gpub024:0/32] 2024-01-21 06:43:49,364 (trainer:737) INFO: 2epoch:train:2301-2400batch: iter_time=9.250e-05, forward_time=0.162, loss_ctc=234.589, loss_att=209.921, acc=0.201, loss=217.321, backward_time=0.236, grad_norm=96.139, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.892e-05, train_time=1.466 +[gpub024:0/32] 2024-01-21 06:45:47,764 (trainer:737) INFO: 2epoch:train:2401-2500batch: iter_time=8.881e-05, forward_time=0.161, loss_ctc=225.439, loss_att=196.216, acc=0.208, loss=204.983, backward_time=0.224, grad_norm=88.570, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.909e-05, train_time=1.184 +[gpub024:0/32] 2024-01-21 06:46:07,828 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub024:0/32] 2024-01-21 06:46:26,384 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 06:46:30,050 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 06:46:30,050 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub024:0/32] 2024-01-21 06:46:30,053 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 06:56:40,674 (trainer:737) INFO: 2epoch:train:2501-2600batch: iter_time=5.503, forward_time=0.301, loss_ctc=268.540, loss_att=243.815, acc=0.194, loss=251.232, backward_time=0.261, grad_norm=103.385, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.055, optim0_lr0=2.925e-05, train_time=6.529 +[gpub024:0/32] 2024-01-21 06:58:44,102 (trainer:737) INFO: 2epoch:train:2601-2700batch: iter_time=8.341e-05, forward_time=0.161, loss_ctc=246.805, loss_att=217.914, acc=0.203, loss=226.582, backward_time=0.225, grad_norm=100.753, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.942e-05, train_time=1.234 +[gpub024:0/32] 2024-01-21 07:01:15,350 (trainer:737) INFO: 2epoch:train:2701-2800batch: iter_time=8.570e-05, forward_time=0.172, loss_ctc=240.234, loss_att=210.714, acc=0.209, loss=219.570, backward_time=0.238, grad_norm=101.011, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.052, optim0_lr0=2.959e-05, train_time=1.512 +[gpub024:0/32] 2024-01-21 07:04:01,118 (trainer:737) INFO: 2epoch:train:2801-2900batch: iter_time=9.133e-05, forward_time=0.178, loss_ctc=251.982, loss_att=234.885, acc=0.191, loss=240.015, backward_time=0.252, grad_norm=88.250, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.053, optim0_lr0=2.975e-05, train_time=1.657 +[gpub024:0/32] 2024-01-21 07:05:50,920 (trainer:737) INFO: 2epoch:train:2901-3000batch: iter_time=9.407e-05, forward_time=0.163, loss_ctc=275.032, loss_att=237.947, acc=0.184, loss=249.072, backward_time=0.226, grad_norm=147.593, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.051, optim0_lr0=2.992e-05, train_time=1.098 +[gpub024:0/32] 2024-01-21 07:07:29,752 (trainer:737) INFO: 2epoch:train:3001-3100batch: iter_time=8.584e-05, forward_time=0.248, loss_ctc=238.163, loss_att=212.849, acc=0.198, loss=220.443, backward_time=0.241, grad_norm=117.457, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.068, optim0_lr0=3.009e-05, train_time=0.988 +[gpub024:0/32] 2024-01-21 07:10:31,389 (trainer:737) INFO: 2epoch:train:3101-3200batch: iter_time=8.934e-05, forward_time=0.185, loss_ctc=234.672, loss_att=204.356, acc=0.202, loss=213.451, backward_time=0.255, grad_norm=92.657, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.053, optim0_lr0=3.025e-05, train_time=1.816 +[gpub024:0/32] 2024-01-21 07:13:15,922 (trainer:737) INFO: 2epoch:train:3201-3300batch: iter_time=9.161e-05, forward_time=0.232, loss_ctc=226.264, loss_att=210.248, acc=0.204, loss=215.053, backward_time=0.283, grad_norm=86.351, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.057, optim0_lr0=3.042e-05, train_time=1.645 +[gpub024:0/32] 2024-01-21 07:15:41,035 (trainer:737) INFO: 2epoch:train:3301-3400batch: iter_time=8.052e-05, forward_time=0.237, loss_ctc=259.525, loss_att=224.990, acc=0.204, loss=235.351, backward_time=0.226, grad_norm=82.395, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.056, optim0_lr0=3.059e-05, train_time=1.451 +[gpub024:0/32] 2024-01-21 07:18:07,983 (trainer:737) INFO: 2epoch:train:3401-3500batch: iter_time=8.787e-05, forward_time=0.163, loss_ctc=218.331, loss_att=197.243, acc=0.207, loss=203.569, backward_time=0.224, grad_norm=84.428, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.075e-05, train_time=1.469 +[gpub024:0/32] 2024-01-21 07:20:04,372 (trainer:737) INFO: 2epoch:train:3501-3600batch: iter_time=9.025e-05, forward_time=0.172, loss_ctc=252.357, loss_att=233.969, acc=0.190, loss=239.485, backward_time=0.249, grad_norm=95.587, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.092e-05, train_time=1.164 +[gpub024:0/32] 2024-01-21 07:21:44,682 (trainer:737) INFO: 2epoch:train:3601-3700batch: iter_time=9.147e-05, forward_time=0.168, loss_ctc=197.695, loss_att=176.588, acc=0.215, loss=182.920, backward_time=0.227, grad_norm=68.013, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.053, optim0_lr0=3.109e-05, train_time=1.003 +[gpub024:0/32] 2024-01-21 07:22:52,553 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub024:0/32] 2024-01-21 07:23:11,899 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 07:23:15,679 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 07:23:15,679 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub024:0/32] 2024-01-21 07:23:15,685 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 07:34:19,606 (trainer:737) INFO: 2epoch:train:3701-3800batch: iter_time=6.472, forward_time=0.310, loss_ctc=241.358, loss_att=207.379, acc=0.215, loss=217.573, backward_time=0.251, grad_norm=91.132, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.060, optim0_lr0=3.125e-05, train_time=7.549 +[gpub024:0/32] 2024-01-21 07:35:43,509 (trainer:737) INFO: 2epoch:train:3801-3900batch: iter_time=8.507e-05, forward_time=0.163, loss_ctc=288.381, loss_att=249.592, acc=0.190, loss=261.229, backward_time=0.228, grad_norm=112.420, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.142e-05, train_time=0.839 +[gpub024:0/32] 2024-01-21 07:37:09,667 (trainer:737) INFO: 2epoch:train:3901-4000batch: iter_time=8.449e-05, forward_time=0.161, loss_ctc=230.682, loss_att=200.532, acc=0.214, loss=209.577, backward_time=0.227, grad_norm=87.220, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.159e-05, train_time=0.861 +[gpub024:0/32] 2024-01-21 07:39:41,474 (trainer:737) INFO: 2epoch:train:4001-4100batch: iter_time=8.844e-05, forward_time=0.215, loss_ctc=223.215, loss_att=199.348, acc=0.211, loss=206.508, backward_time=0.296, grad_norm=62.646, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.059, optim0_lr0=3.175e-05, train_time=1.518 +[gpub024:0/32] 2024-01-21 07:42:08,432 (trainer:737) INFO: 2epoch:train:4101-4200batch: iter_time=8.122e-05, forward_time=0.168, loss_ctc=277.420, loss_att=240.075, acc=0.184, loss=251.278, backward_time=0.226, grad_norm=116.529, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.192e-05, train_time=1.469 +[gpub024:0/32] 2024-01-21 07:44:11,007 (trainer:737) INFO: 2epoch:train:4201-4300batch: iter_time=8.398e-05, forward_time=0.199, loss_ctc=263.705, loss_att=225.890, acc=0.194, loss=237.235, backward_time=0.303, grad_norm=121.672, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.061, optim0_lr0=3.209e-05, train_time=1.225 +[gpub024:0/32] 2024-01-21 07:46:15,888 (trainer:737) INFO: 2epoch:train:4301-4400batch: iter_time=8.033e-05, forward_time=0.160, loss_ctc=196.307, loss_att=174.026, acc=0.218, loss=180.710, backward_time=0.223, grad_norm=82.089, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.225e-05, train_time=1.249 +[gpub024:0/32] 2024-01-21 07:48:21,008 (trainer:737) INFO: 2epoch:train:4401-4500batch: iter_time=8.508e-05, forward_time=0.161, loss_ctc=255.716, loss_att=224.966, acc=0.197, loss=234.191, backward_time=0.226, grad_norm=105.065, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.242e-05, train_time=1.251 +[gpub024:0/32] 2024-01-21 07:50:51,488 (trainer:737) INFO: 2epoch:train:4501-4600batch: iter_time=8.319e-05, forward_time=0.196, loss_ctc=235.227, loss_att=208.637, acc=0.207, loss=216.614, backward_time=0.224, grad_norm=64.723, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.051, optim0_lr0=3.259e-05, train_time=1.505 +[gpub024:0/32] 2024-01-21 07:52:27,390 (trainer:737) INFO: 2epoch:train:4601-4700batch: iter_time=8.582e-05, forward_time=0.220, loss_ctc=239.126, loss_att=207.929, acc=0.210, loss=217.288, backward_time=0.236, grad_norm=84.245, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.055, optim0_lr0=3.275e-05, train_time=0.959 +[gpub024:0/32] 2024-01-21 07:54:24,285 (trainer:737) INFO: 2epoch:train:4701-4800batch: iter_time=8.323e-05, forward_time=0.248, loss_ctc=216.670, loss_att=190.129, acc=0.208, loss=198.091, backward_time=0.236, grad_norm=71.871, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.055, optim0_lr0=3.292e-05, train_time=1.169 +[gpub024:0/32] 2024-01-21 07:57:10,992 (trainer:737) INFO: 2epoch:train:4801-4900batch: iter_time=8.758e-05, forward_time=0.259, loss_ctc=229.003, loss_att=204.945, acc=0.205, loss=212.163, backward_time=0.243, grad_norm=79.685, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.069, optim0_lr0=3.309e-05, train_time=1.667 +[gpub024:0/32] 2024-01-21 07:59:45,528 (trainer:737) INFO: 2epoch:train:4901-5000batch: iter_time=8.339e-05, forward_time=0.185, loss_ctc=219.063, loss_att=190.733, acc=0.215, loss=199.232, backward_time=0.263, grad_norm=84.716, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.052, optim0_lr0=3.325e-05, train_time=1.545 +[gpub024:0/32] 2024-01-21 08:00:05,807 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub024:0/32] 2024-01-21 08:00:24,647 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 08:00:28,295 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 08:00:28,295 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub024:0/32] 2024-01-21 08:00:28,451 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 08:12:21,330 (trainer:737) INFO: 2epoch:train:5001-5100batch: iter_time=6.579, forward_time=0.199, loss_ctc=262.118, loss_att=238.490, acc=0.199, loss=245.578, backward_time=0.231, grad_norm=91.844, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.052, optim0_lr0=3.342e-05, train_time=7.558 +[gpub024:0/32] 2024-01-21 08:13:41,639 (trainer:737) INFO: 2epoch:train:5101-5200batch: iter_time=8.684e-05, forward_time=0.161, loss_ctc=237.366, loss_att=211.685, acc=0.210, loss=219.389, backward_time=0.228, grad_norm=79.965, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.359e-05, train_time=0.803 +[gpub024:0/32] 2024-01-21 08:15:02,405 (trainer:737) INFO: 2epoch:train:5201-5300batch: iter_time=8.781e-05, forward_time=0.162, loss_ctc=232.605, loss_att=204.885, acc=0.216, loss=213.201, backward_time=0.227, grad_norm=77.109, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.375e-05, train_time=0.807 +[gpub024:0/32] 2024-01-21 08:17:33,556 (trainer:737) INFO: 2epoch:train:5301-5400batch: iter_time=9.406e-05, forward_time=0.261, loss_ctc=244.863, loss_att=230.471, acc=0.196, loss=234.789, backward_time=0.245, grad_norm=69.615, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.060, optim0_lr0=3.392e-05, train_time=1.511 +[gpub024:0/32] 2024-01-21 08:20:05,462 (trainer:737) INFO: 2epoch:train:5401-5500batch: iter_time=8.910e-05, forward_time=0.162, loss_ctc=265.929, loss_att=232.565, acc=0.192, loss=242.574, backward_time=0.229, grad_norm=109.310, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.409e-05, train_time=1.519 +[gpub024:0/32] 2024-01-21 08:23:04,770 (trainer:737) INFO: 2epoch:train:5501-5600batch: iter_time=8.741e-05, forward_time=0.375, loss_ctc=228.963, loss_att=206.505, acc=0.206, loss=213.242, backward_time=0.266, grad_norm=119.623, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.057, optim0_lr0=3.425e-05, train_time=1.793 +[gpub024:0/32] 2024-01-21 08:25:16,783 (trainer:737) INFO: 2epoch:train:5601-5700batch: iter_time=9.252e-05, forward_time=0.208, loss_ctc=223.296, loss_att=198.104, acc=0.210, loss=205.662, backward_time=0.239, grad_norm=79.582, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.053, optim0_lr0=3.442e-05, train_time=1.320 +[gpub024:0/32] 2024-01-21 08:27:52,758 (trainer:737) INFO: 2epoch:train:5701-5800batch: iter_time=9.247e-05, forward_time=0.161, loss_ctc=217.604, loss_att=204.479, acc=0.211, loss=208.417, backward_time=0.225, grad_norm=69.104, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.459e-05, train_time=1.560 +[gpub024:0/32] 2024-01-21 08:29:21,767 (trainer:737) INFO: 2epoch:train:5801-5900batch: iter_time=8.738e-05, forward_time=0.198, loss_ctc=247.443, loss_att=218.163, acc=0.211, loss=226.947, backward_time=0.229, grad_norm=70.090, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.475e-05, train_time=0.890 +[gpub024:0/32] 2024-01-21 08:31:23,160 (trainer:737) INFO: 2epoch:train:5901-6000batch: iter_time=9.375e-05, forward_time=0.174, loss_ctc=208.974, loss_att=192.924, acc=0.213, loss=197.739, backward_time=0.231, grad_norm=70.760, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.052, optim0_lr0=3.492e-05, train_time=1.214 +[gpub024:0/32] 2024-01-21 08:34:14,397 (trainer:737) INFO: 2epoch:train:6001-6100batch: iter_time=8.782e-05, forward_time=0.260, loss_ctc=242.242, loss_att=228.397, acc=0.197, loss=232.550, backward_time=0.242, grad_norm=80.398, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.056, optim0_lr0=3.509e-05, train_time=1.712 +[gpub024:0/32] 2024-01-21 08:36:47,875 (trainer:737) INFO: 2epoch:train:6101-6200batch: iter_time=2.561e-04, forward_time=0.323, loss_ctc=188.975, loss_att=172.481, acc=0.223, loss=177.429, backward_time=0.256, grad_norm=59.407, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.058, optim0_lr0=3.525e-05, train_time=1.534 +[gpub024:0/32] 2024-01-21 08:38:51,607 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub024:0/32] 2024-01-21 08:39:10,542 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 08:39:14,257 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 08:39:14,257 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub024:0/32] 2024-01-21 08:39:14,262 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 08:49:44,113 (trainer:737) INFO: 2epoch:train:6201-6300batch: iter_time=5.987, forward_time=0.230, loss_ctc=228.740, loss_att=204.508, acc=0.221, loss=211.778, backward_time=0.231, grad_norm=83.068, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.053, optim0_lr0=3.542e-05, train_time=7.762 +[gpub024:0/32] 2024-01-21 08:51:05,776 (trainer:737) INFO: 2epoch:train:6301-6400batch: iter_time=8.207e-05, forward_time=0.162, loss_ctc=271.393, loss_att=248.959, acc=0.196, loss=255.689, backward_time=0.229, grad_norm=90.159, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.559e-05, train_time=0.817 +[gpub024:0/32] 2024-01-21 08:52:30,426 (trainer:737) INFO: 2epoch:train:6401-6500batch: iter_time=8.196e-05, forward_time=0.161, loss_ctc=218.958, loss_att=196.417, acc=0.220, loss=203.179, backward_time=0.228, grad_norm=76.215, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.575e-05, train_time=0.846 +[gpub024:0/32] 2024-01-21 08:54:51,499 (trainer:737) INFO: 2epoch:train:6501-6600batch: iter_time=9.141e-05, forward_time=0.272, loss_ctc=210.582, loss_att=197.155, acc=0.219, loss=201.183, backward_time=0.250, grad_norm=59.152, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.066, optim0_lr0=3.592e-05, train_time=1.411 +[gpub024:0/32] 2024-01-21 08:57:42,416 (trainer:737) INFO: 2epoch:train:6601-6700batch: iter_time=9.791e-05, forward_time=0.254, loss_ctc=260.506, loss_att=242.017, acc=0.190, loss=247.564, backward_time=0.292, grad_norm=90.511, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.061, optim0_lr0=3.609e-05, train_time=1.709 +[gpub024:0/32] 2024-01-21 09:00:00,617 (trainer:737) INFO: 2epoch:train:6701-6800batch: iter_time=9.461e-05, forward_time=0.216, loss_ctc=248.125, loss_att=226.360, acc=0.200, loss=232.889, backward_time=0.236, grad_norm=119.333, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.625e-05, train_time=1.381 +[gpub024:0/32] 2024-01-21 09:03:00,907 (trainer:737) INFO: 2epoch:train:6801-6900batch: iter_time=9.318e-05, forward_time=0.198, loss_ctc=184.510, loss_att=169.757, acc=0.227, loss=174.183, backward_time=0.243, grad_norm=79.053, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.056, optim0_lr0=3.642e-05, train_time=1.803 +[gpub024:0/32] 2024-01-21 09:05:27,883 (trainer:737) INFO: 2epoch:train:6901-7000batch: iter_time=9.256e-05, forward_time=0.163, loss_ctc=238.134, loss_att=223.327, acc=0.204, loss=227.769, backward_time=0.226, grad_norm=83.362, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.051, optim0_lr0=3.659e-05, train_time=1.470 +[gpub024:0/32] 2024-01-21 09:07:07,969 (trainer:737) INFO: 2epoch:train:7001-7100batch: iter_time=9.336e-05, forward_time=0.307, loss_ctc=216.896, loss_att=207.462, acc=0.214, loss=210.292, backward_time=0.239, grad_norm=60.200, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.068, optim0_lr0=3.675e-05, train_time=1.001 +[gpub024:0/32] 2024-01-21 09:09:40,290 (trainer:737) INFO: 2epoch:train:7101-7200batch: iter_time=9.464e-05, forward_time=0.284, loss_ctc=220.562, loss_att=205.600, acc=0.214, loss=210.089, backward_time=0.279, grad_norm=74.969, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.072, optim0_lr0=3.692e-05, train_time=1.523 +[gpub024:0/32] 2024-01-21 09:11:57,556 (trainer:737) INFO: 2epoch:train:7201-7300batch: iter_time=9.826e-05, forward_time=0.210, loss_ctc=202.133, loss_att=189.144, acc=0.214, loss=193.040, backward_time=0.225, grad_norm=69.761, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.709e-05, train_time=1.373 +[gpub024:0/32] 2024-01-21 09:14:55,274 (trainer:737) INFO: 2epoch:train:7301-7400batch: iter_time=1.023e-04, forward_time=0.160, loss_ctc=211.126, loss_att=202.855, acc=0.213, loss=205.337, backward_time=0.224, grad_norm=66.765, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.725e-05, train_time=1.777 +[gpub024:0/32] 2024-01-21 09:17:02,278 (trainer:737) INFO: 2epoch:train:7401-7500batch: iter_time=9.630e-05, forward_time=0.160, loss_ctc=199.691, loss_att=188.392, acc=0.222, loss=191.782, backward_time=0.225, grad_norm=86.864, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.742e-05, train_time=1.270 +[gpub024:0/32] 2024-01-21 09:17:22,307 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub024:0/32] 2024-01-21 09:17:41,572 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 09:17:45,223 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 09:17:45,223 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub024:0/32] 2024-01-21 09:17:45,249 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 09:28:51,163 (trainer:737) INFO: 2epoch:train:7501-7600batch: iter_time=5.842, forward_time=0.196, loss_ctc=239.677, loss_att=224.630, acc=0.206, loss=229.144, backward_time=0.230, grad_norm=82.967, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.052, optim0_lr0=3.759e-05, train_time=7.089 +[gpub024:0/32] 2024-01-21 09:31:17,265 (trainer:737) INFO: 2epoch:train:7601-7700batch: iter_time=8.296e-05, forward_time=0.196, loss_ctc=219.073, loss_att=203.958, acc=0.218, loss=208.492, backward_time=0.236, grad_norm=71.932, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.055, optim0_lr0=3.775e-05, train_time=1.461 +[gpub024:0/32] 2024-01-21 09:33:05,428 (trainer:737) INFO: 2epoch:train:7701-7800batch: iter_time=8.374e-05, forward_time=0.304, loss_ctc=213.855, loss_att=198.193, acc=0.225, loss=202.891, backward_time=0.259, grad_norm=68.242, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.079, optim0_lr0=3.792e-05, train_time=1.081 +[gpub024:0/32] 2024-01-21 09:34:41,034 (trainer:737) INFO: 2epoch:train:7801-7900batch: iter_time=8.207e-05, forward_time=0.162, loss_ctc=221.706, loss_att=216.057, acc=0.206, loss=217.751, backward_time=0.230, grad_norm=65.031, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.809e-05, train_time=0.956 +[gpub024:0/32] 2024-01-21 09:36:53,141 (trainer:737) INFO: 2epoch:train:7901-8000batch: iter_time=8.311e-05, forward_time=0.161, loss_ctc=239.975, loss_att=220.096, acc=0.200, loss=226.060, backward_time=0.227, grad_norm=85.575, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.825e-05, train_time=1.321 +[gpub024:0/32] 2024-01-21 09:39:51,136 (trainer:737) INFO: 2epoch:train:8001-8100batch: iter_time=8.436e-05, forward_time=0.161, loss_ctc=208.967, loss_att=198.476, acc=0.214, loss=201.623, backward_time=0.223, grad_norm=84.455, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.842e-05, train_time=1.780 +[gpub024:0/32] 2024-01-21 09:43:18,224 (trainer:737) INFO: 2epoch:train:8101-8200batch: iter_time=8.673e-05, forward_time=0.190, loss_ctc=200.780, loss_att=191.624, acc=0.217, loss=194.371, backward_time=0.252, grad_norm=67.507, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.055, optim0_lr0=3.859e-05, train_time=2.070 +[gpub024:0/32] 2024-01-21 09:45:27,691 (trainer:737) INFO: 2epoch:train:8201-8300batch: iter_time=8.910e-05, forward_time=0.211, loss_ctc=198.574, loss_att=195.617, acc=0.220, loss=196.504, backward_time=0.240, grad_norm=61.646, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.055, optim0_lr0=3.875e-05, train_time=1.295 +[gpub024:0/32] 2024-01-21 09:48:10,554 (trainer:737) INFO: 2epoch:train:8301-8400batch: iter_time=9.070e-05, forward_time=0.453, loss_ctc=221.790, loss_att=209.065, acc=0.218, loss=212.882, backward_time=0.274, grad_norm=68.268, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.064, optim0_lr0=3.892e-05, train_time=1.629 +[gpub024:0/32] 2024-01-21 09:49:41,631 (trainer:737) INFO: 2epoch:train:8401-8500batch: iter_time=8.523e-05, forward_time=0.160, loss_ctc=187.897, loss_att=184.793, acc=0.223, loss=185.724, backward_time=0.227, grad_norm=66.366, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.909e-05, train_time=0.911 +[gpub024:0/32] 2024-01-21 09:52:20,281 (trainer:737) INFO: 2epoch:train:8501-8600batch: iter_time=8.635e-05, forward_time=0.162, loss_ctc=218.076, loss_att=215.271, acc=0.205, loss=216.113, backward_time=0.225, grad_norm=68.686, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.925e-05, train_time=1.586 +[gpub024:0/32] 2024-01-21 09:54:56,536 (trainer:737) INFO: 2epoch:train:8601-8700batch: iter_time=8.389e-05, forward_time=0.159, loss_ctc=169.898, loss_att=167.304, acc=0.227, loss=168.082, backward_time=0.223, grad_norm=51.693, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.942e-05, train_time=1.563 +[gpub024:0/32] 2024-01-21 09:56:31,531 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub024:0/32] 2024-01-21 09:56:50,296 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 09:56:54,253 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 09:56:54,253 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub024:0/32] 2024-01-21 09:56:54,256 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 10:06:24,976 (trainer:737) INFO: 2epoch:train:8701-8800batch: iter_time=5.535, forward_time=0.265, loss_ctc=204.846, loss_att=201.447, acc=0.227, loss=202.467, backward_time=0.239, grad_norm=77.903, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.055, optim0_lr0=3.959e-05, train_time=6.884 +[gpub024:0/32] 2024-01-21 10:08:03,109 (trainer:737) INFO: 2epoch:train:8801-8900batch: iter_time=8.002e-05, forward_time=0.163, loss_ctc=242.449, loss_att=246.009, acc=0.201, loss=244.941, backward_time=0.228, grad_norm=89.561, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.975e-05, train_time=0.982 +[gpub024:0/32] 2024-01-21 10:10:38,522 (trainer:737) INFO: 2epoch:train:8901-9000batch: iter_time=8.131e-05, forward_time=0.161, loss_ctc=201.129, loss_att=192.800, acc=0.227, loss=195.299, backward_time=0.226, grad_norm=77.445, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.051, optim0_lr0=3.992e-05, train_time=1.554 +[gpub024:0/32] 2024-01-21 10:13:00,254 (trainer:737) INFO: 2epoch:train:9001-9100batch: iter_time=8.223e-05, forward_time=0.161, loss_ctc=187.176, loss_att=192.050, acc=0.226, loss=190.588, backward_time=0.226, grad_norm=58.931, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.009e-05, train_time=1.417 +[gpub024:0/32] 2024-01-21 10:15:33,656 (trainer:737) INFO: 2epoch:train:9101-9200batch: iter_time=8.065e-05, forward_time=0.163, loss_ctc=232.664, loss_att=237.841, acc=0.196, loss=236.288, backward_time=0.227, grad_norm=75.046, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.025e-05, train_time=1.534 +[gpub024:0/32] 2024-01-21 10:18:06,971 (trainer:737) INFO: 2epoch:train:9201-9300batch: iter_time=8.247e-05, forward_time=0.162, loss_ctc=220.542, loss_att=221.388, acc=0.206, loss=221.135, backward_time=0.226, grad_norm=85.394, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.042e-05, train_time=1.533 +[gpub024:0/32] 2024-01-21 10:21:37,086 (trainer:737) INFO: 2epoch:train:9301-9400batch: iter_time=8.920e-05, forward_time=0.345, loss_ctc=163.426, loss_att=164.934, acc=0.235, loss=164.482, backward_time=0.412, grad_norm=58.719, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.086, optim0_lr0=4.059e-05, train_time=2.101 +[gpub024:0/32] 2024-01-21 10:24:48,473 (trainer:737) INFO: 2epoch:train:9401-9500batch: iter_time=9.323e-05, forward_time=0.162, loss_ctc=211.375, loss_att=217.735, acc=0.212, loss=215.827, backward_time=0.226, grad_norm=64.868, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.075e-05, train_time=1.914 +[gpub024:0/32] 2024-01-21 10:27:07,156 (trainer:737) INFO: 2epoch:train:9501-9600batch: iter_time=9.232e-05, forward_time=0.162, loss_ctc=191.529, loss_att=203.279, acc=0.220, loss=199.754, backward_time=0.226, grad_norm=55.924, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.092e-05, train_time=1.387 +[gpub024:0/32] 2024-01-21 10:28:36,263 (trainer:737) INFO: 2epoch:train:9601-9700batch: iter_time=8.804e-05, forward_time=0.161, loss_ctc=196.695, loss_att=200.832, acc=0.221, loss=199.591, backward_time=0.227, grad_norm=68.243, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.109e-05, train_time=0.891 +[gpub024:0/32] 2024-01-21 10:31:30,236 (trainer:737) INFO: 2epoch:train:9701-9800batch: iter_time=8.059e-05, forward_time=0.160, loss_ctc=180.509, loss_att=184.686, acc=0.221, loss=183.433, backward_time=0.224, grad_norm=62.165, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.125e-05, train_time=1.740 +[gpub024:0/32] 2024-01-21 10:33:54,061 (trainer:737) INFO: 2epoch:train:9801-9900batch: iter_time=8.463e-05, forward_time=0.161, loss_ctc=189.776, loss_att=198.824, acc=0.220, loss=196.110, backward_time=0.224, grad_norm=66.974, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.142e-05, train_time=1.438 +[gpub024:0/32] 2024-01-21 10:35:41,480 (trainer:737) INFO: 2epoch:train:9901-10000batch: iter_time=8.250e-05, forward_time=0.208, loss_ctc=177.553, loss_att=185.585, acc=0.229, loss=183.175, backward_time=0.270, grad_norm=66.750, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.069, optim0_lr0=4.159e-05, train_time=1.074 +[gpub024:0/32] 2024-01-21 10:36:01,508 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub024:0/32] 2024-01-21 10:36:20,612 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 10:36:24,315 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 10:36:24,315 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub024:0/32] 2024-01-21 10:36:24,318 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 10:46:19,711 (trainer:737) INFO: 2epoch:train:10001-10100batch: iter_time=5.291, forward_time=0.212, loss_ctc=211.537, loss_att=225.597, acc=0.213, loss=221.379, backward_time=0.238, grad_norm=76.161, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.053, optim0_lr0=4.175e-05, train_time=6.382 +[gpub024:0/32] 2024-01-21 10:48:01,575 (trainer:737) INFO: 2epoch:train:10101-10200batch: iter_time=8.986e-05, forward_time=0.161, loss_ctc=197.284, loss_att=203.088, acc=0.223, loss=201.347, backward_time=0.227, grad_norm=71.489, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.192e-05, train_time=1.019 +[gpub024:0/32] 2024-01-21 10:51:31,483 (trainer:737) INFO: 2epoch:train:10201-10300batch: iter_time=9.015e-05, forward_time=0.180, loss_ctc=191.913, loss_att=194.213, acc=0.233, loss=193.523, backward_time=0.275, grad_norm=62.049, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.059, optim0_lr0=4.209e-05, train_time=2.099 +[gpub024:0/32] 2024-01-21 10:54:13,703 (trainer:737) INFO: 2epoch:train:10301-10400batch: iter_time=8.781e-05, forward_time=0.163, loss_ctc=195.989, loss_att=217.526, acc=0.211, loss=211.065, backward_time=0.227, grad_norm=59.191, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.225e-05, train_time=1.622 +[gpub024:0/32] 2024-01-21 10:56:03,623 (trainer:737) INFO: 2epoch:train:10401-10500batch: iter_time=9.124e-05, forward_time=0.162, loss_ctc=213.058, loss_att=221.674, acc=0.203, loss=219.089, backward_time=0.228, grad_norm=75.719, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.242e-05, train_time=1.099 +[gpub024:0/32] 2024-01-21 10:58:44,097 (trainer:737) INFO: 2epoch:train:10501-10600batch: iter_time=2.653e-04, forward_time=0.360, loss_ctc=186.351, loss_att=197.085, acc=0.219, loss=193.865, backward_time=0.295, grad_norm=69.272, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.064, optim0_lr0=4.259e-05, train_time=1.605 +[gpub024:0/32] 2024-01-21 11:00:14,028 (trainer:737) INFO: 2epoch:train:10601-10700batch: iter_time=1.024e-04, forward_time=0.162, loss_ctc=179.379, loss_att=190.717, acc=0.223, loss=187.316, backward_time=0.228, grad_norm=62.268, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.275e-05, train_time=0.899 +[gpub024:0/32] 2024-01-21 11:02:04,285 (trainer:737) INFO: 2epoch:train:10701-10800batch: iter_time=8.979e-05, forward_time=0.237, loss_ctc=177.860, loss_att=194.007, acc=0.226, loss=189.163, backward_time=0.232, grad_norm=54.215, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.052, optim0_lr0=4.292e-05, train_time=1.102 +[gpub024:0/32] 2024-01-21 11:05:05,767 (trainer:737) INFO: 2epoch:train:10801-10900batch: iter_time=9.648e-05, forward_time=0.162, loss_ctc=196.944, loss_att=209.120, acc=0.222, loss=205.467, backward_time=0.226, grad_norm=60.768, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.309e-05, train_time=1.815 +[gpub024:0/32] 2024-01-21 11:07:20,467 (trainer:737) INFO: 2epoch:train:10901-11000batch: iter_time=9.434e-05, forward_time=0.161, loss_ctc=167.046, loss_att=183.033, acc=0.227, loss=178.237, backward_time=0.225, grad_norm=58.283, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.051, optim0_lr0=4.325e-05, train_time=1.347 +[gpub024:0/32] 2024-01-21 11:09:34,137 (trainer:737) INFO: 2epoch:train:11001-11100batch: iter_time=8.874e-05, forward_time=0.162, loss_ctc=194.706, loss_att=217.799, acc=0.211, loss=210.871, backward_time=0.228, grad_norm=62.817, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.342e-05, train_time=1.336 +[gpub024:0/32] 2024-01-21 11:11:29,609 (trainer:737) INFO: 2epoch:train:11101-11200batch: iter_time=8.806e-05, forward_time=0.192, loss_ctc=152.200, loss_att=164.902, acc=0.235, loss=161.092, backward_time=0.297, grad_norm=51.139, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.058, optim0_lr0=4.359e-05, train_time=1.155 +[gpub024:0/32] 2024-01-21 11:12:52,230 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub024:0/32] 2024-01-21 11:13:11,431 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 11:13:15,040 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 11:13:15,040 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub024:0/32] 2024-01-21 11:13:15,044 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 11:22:24,187 (trainer:737) INFO: 2epoch:train:11201-11300batch: iter_time=5.274, forward_time=0.316, loss_ctc=182.443, loss_att=195.045, acc=0.234, loss=191.265, backward_time=0.260, grad_norm=67.292, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.060, optim0_lr0=4.375e-05, train_time=6.546 +[gpub024:0/32] 2024-01-21 11:23:48,392 (trainer:737) INFO: 2epoch:train:11301-11400batch: iter_time=8.878e-05, forward_time=0.163, loss_ctc=216.806, loss_att=235.576, acc=0.208, loss=229.945, backward_time=0.228, grad_norm=82.055, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.392e-05, train_time=0.842 +[gpub024:0/32] 2024-01-21 11:26:12,208 (trainer:737) INFO: 2epoch:train:11401-11500batch: iter_time=8.791e-05, forward_time=0.161, loss_ctc=183.590, loss_att=187.158, acc=0.234, loss=186.088, backward_time=0.226, grad_norm=69.442, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.409e-05, train_time=1.438 +[gpub024:0/32] 2024-01-21 11:29:12,492 (trainer:737) INFO: 2epoch:train:11501-11600batch: iter_time=8.466e-05, forward_time=0.164, loss_ctc=166.442, loss_att=186.899, acc=0.233, loss=180.762, backward_time=0.225, grad_norm=51.578, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.425e-05, train_time=1.803 +[gpub024:0/32] 2024-01-21 11:31:25,695 (trainer:737) INFO: 2epoch:train:11601-11700batch: iter_time=8.335e-05, forward_time=0.162, loss_ctc=208.882, loss_att=228.034, acc=0.200, loss=222.289, backward_time=0.228, grad_norm=65.881, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.442e-05, train_time=1.332 +[gpub024:0/32] 2024-01-21 11:33:42,231 (trainer:737) INFO: 2epoch:train:11701-11800batch: iter_time=8.475e-05, forward_time=0.372, loss_ctc=195.765, loss_att=213.355, acc=0.212, loss=208.078, backward_time=0.265, grad_norm=73.868, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.078, optim0_lr0=4.459e-05, train_time=1.365 +[gpub024:0/32] 2024-01-21 11:36:20,515 (trainer:737) INFO: 2epoch:train:11801-11900batch: iter_time=8.881e-05, forward_time=0.176, loss_ctc=147.160, loss_att=160.891, acc=0.240, loss=156.772, backward_time=0.281, grad_norm=58.215, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.071, optim0_lr0=4.475e-05, train_time=1.583 +[gpub024:0/32] 2024-01-21 11:39:02,234 (trainer:737) INFO: 2epoch:train:11901-12000batch: iter_time=9.437e-05, forward_time=0.162, loss_ctc=190.632, loss_att=210.824, acc=0.219, loss=204.766, backward_time=0.226, grad_norm=64.620, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.492e-05, train_time=1.617 +[gpub024:0/32] 2024-01-21 11:42:01,528 (trainer:737) INFO: 2epoch:train:12001-12100batch: iter_time=9.269e-05, forward_time=0.161, loss_ctc=171.226, loss_att=195.649, acc=0.227, loss=188.322, backward_time=0.226, grad_norm=50.466, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.509e-05, train_time=1.792 +[gpub024:0/32] 2024-01-21 11:43:57,004 (trainer:737) INFO: 2epoch:train:12101-12200batch: iter_time=8.575e-05, forward_time=0.161, loss_ctc=175.791, loss_att=194.657, acc=0.229, loss=188.997, backward_time=0.225, grad_norm=58.546, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.525e-05, train_time=1.155 +[gpub024:0/32] 2024-01-21 11:45:41,952 (trainer:737) INFO: 2epoch:train:12201-12300batch: iter_time=8.361e-05, forward_time=0.160, loss_ctc=162.117, loss_att=177.448, acc=0.228, loss=172.849, backward_time=0.224, grad_norm=56.202, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.542e-05, train_time=1.049 +[gpub024:0/32] 2024-01-21 11:47:53,370 (trainer:737) INFO: 2epoch:train:12301-12400batch: iter_time=8.567e-05, forward_time=0.315, loss_ctc=170.480, loss_att=191.648, acc=0.224, loss=185.297, backward_time=0.298, grad_norm=56.002, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.078, optim0_lr0=4.559e-05, train_time=1.314 +[gpub024:0/32] 2024-01-21 11:50:21,709 (trainer:737) INFO: 2epoch:train:12401-12500batch: iter_time=8.174e-05, forward_time=0.227, loss_ctc=158.129, loss_att=179.541, acc=0.236, loss=173.117, backward_time=0.250, grad_norm=56.338, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.058, optim0_lr0=4.575e-05, train_time=1.483 +[gpub024:0/32] 2024-01-21 11:50:41,737 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub024:0/32] 2024-01-21 11:51:01,089 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 11:51:04,991 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 11:51:04,991 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub024:0/32] 2024-01-21 11:51:04,994 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 12:03:54,844 (trainer:737) INFO: 2epoch:train:12501-12600batch: iter_time=6.743, forward_time=0.166, loss_ctc=189.494, loss_att=214.330, acc=0.221, loss=206.879, backward_time=0.228, grad_norm=67.290, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.592e-05, train_time=8.131 +[gpub024:0/32] 2024-01-21 12:05:15,765 (trainer:737) INFO: 2epoch:train:12601-12700batch: iter_time=8.742e-05, forward_time=0.161, loss_ctc=176.820, loss_att=195.331, acc=0.231, loss=189.777, backward_time=0.227, grad_norm=62.074, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.609e-05, train_time=0.809 +[gpub024:0/32] 2024-01-21 12:06:59,941 (trainer:737) INFO: 2epoch:train:12701-12800batch: iter_time=8.314e-05, forward_time=0.162, loss_ctc=173.997, loss_att=190.113, acc=0.237, loss=185.278, backward_time=0.228, grad_norm=57.607, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.051, optim0_lr0=4.625e-05, train_time=1.042 +[gpub024:0/32] 2024-01-21 12:10:07,465 (trainer:737) INFO: 2epoch:train:12801-12900batch: iter_time=8.637e-05, forward_time=0.323, loss_ctc=175.678, loss_att=206.467, acc=0.219, loss=197.230, backward_time=0.377, grad_norm=55.567, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.078, optim0_lr0=4.642e-05, train_time=1.875 +[gpub024:0/32] 2024-01-21 12:12:26,216 (trainer:737) INFO: 2epoch:train:12901-13000batch: iter_time=8.736e-05, forward_time=0.225, loss_ctc=193.775, loss_att=214.246, acc=0.212, loss=208.105, backward_time=0.236, grad_norm=72.197, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.075, optim0_lr0=4.659e-05, train_time=1.387 +[gpub024:0/32] 2024-01-21 12:15:47,450 (trainer:737) INFO: 2epoch:train:13001-13100batch: iter_time=9.301e-05, forward_time=0.161, loss_ctc=167.376, loss_att=189.572, acc=0.227, loss=182.913, backward_time=0.224, grad_norm=67.216, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.675e-05, train_time=2.012 +[gpub024:0/32] 2024-01-21 12:19:23,314 (trainer:737) INFO: 2epoch:train:13101-13200batch: iter_time=8.833e-05, forward_time=0.161, loss_ctc=160.247, loss_att=184.753, acc=0.231, loss=177.401, backward_time=0.223, grad_norm=57.673, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.692e-05, train_time=2.158 +[gpub024:0/32] 2024-01-21 12:21:00,301 (trainer:737) INFO: 2epoch:train:13201-13300batch: iter_time=8.285e-05, forward_time=0.164, loss_ctc=160.907, loss_att=189.457, acc=0.232, loss=180.892, backward_time=0.227, grad_norm=49.905, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.709e-05, train_time=0.970 +[gpub024:0/32] 2024-01-21 12:22:49,558 (trainer:737) INFO: 2epoch:train:13301-13400batch: iter_time=8.492e-05, forward_time=0.161, loss_ctc=176.327, loss_att=201.275, acc=0.229, loss=193.790, backward_time=0.227, grad_norm=55.806, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.725e-05, train_time=1.092 +[gpub024:0/32] 2024-01-21 12:25:07,496 (trainer:737) INFO: 2epoch:train:13401-13500batch: iter_time=8.740e-05, forward_time=0.421, loss_ctc=150.789, loss_att=178.223, acc=0.233, loss=169.993, backward_time=0.312, grad_norm=52.181, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.064, optim0_lr0=4.742e-05, train_time=1.379 +[gpub024:0/32] 2024-01-21 12:26:49,183 (trainer:737) INFO: 2epoch:train:13501-13600batch: iter_time=8.187e-05, forward_time=0.228, loss_ctc=175.408, loss_att=207.893, acc=0.216, loss=198.147, backward_time=0.232, grad_norm=59.868, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.058, optim0_lr0=4.759e-05, train_time=1.017 +[gpub024:0/32] 2024-01-21 12:29:05,911 (trainer:737) INFO: 2epoch:train:13601-13700batch: iter_time=8.967e-05, forward_time=0.159, loss_ctc=137.150, loss_att=160.655, acc=0.239, loss=153.604, backward_time=0.224, grad_norm=48.053, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.775e-05, train_time=1.367 +[gpub024:0/32] 2024-01-21 12:30:37,269 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub024:0/32] 2024-01-21 12:30:56,249 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 12:30:59,817 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 12:30:59,817 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub024:0/32] 2024-01-21 12:30:59,821 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 12:42:01,547 (trainer:737) INFO: 2epoch:train:13701-13800batch: iter_time=6.356, forward_time=0.164, loss_ctc=163.751, loss_att=193.859, acc=0.241, loss=184.827, backward_time=0.225, grad_norm=63.970, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.792e-05, train_time=7.755 +[gpub024:0/32] 2024-01-21 12:43:22,498 (trainer:737) INFO: 2epoch:train:13801-13900batch: iter_time=8.267e-05, forward_time=0.162, loss_ctc=192.258, loss_att=237.489, acc=0.213, loss=223.920, backward_time=0.229, grad_norm=64.545, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.809e-05, train_time=0.810 +[gpub024:0/32] 2024-01-21 12:44:55,692 (trainer:737) INFO: 2epoch:train:13901-14000batch: iter_time=8.501e-05, forward_time=0.209, loss_ctc=166.963, loss_att=185.217, acc=0.240, loss=179.741, backward_time=0.257, grad_norm=65.861, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.066, optim0_lr0=4.825e-05, train_time=0.932 +[gpub024:0/32] 2024-01-21 12:47:12,062 (trainer:737) INFO: 2epoch:train:14001-14100batch: iter_time=8.791e-05, forward_time=0.430, loss_ctc=147.813, loss_att=184.558, acc=0.239, loss=173.535, backward_time=0.277, grad_norm=47.799, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.074, optim0_lr0=4.842e-05, train_time=1.363 +[gpub024:0/32] 2024-01-21 12:51:09,162 (trainer:737) INFO: 2epoch:train:14101-14200batch: iter_time=8.721e-05, forward_time=0.172, loss_ctc=188.929, loss_att=230.608, acc=0.206, loss=218.104, backward_time=0.238, grad_norm=61.021, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.060, optim0_lr0=4.859e-05, train_time=2.372 +[gpub024:0/32] 2024-01-21 12:53:27,429 (trainer:737) INFO: 2epoch:train:14201-14300batch: iter_time=9.045e-05, forward_time=0.162, loss_ctc=176.423, loss_att=214.171, acc=0.219, loss=202.846, backward_time=0.226, grad_norm=62.499, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.875e-05, train_time=1.382 +[gpub024:0/32] 2024-01-21 12:56:43,908 (trainer:737) INFO: 2epoch:train:14301-14400batch: iter_time=8.738e-05, forward_time=0.161, loss_ctc=131.590, loss_att=157.790, acc=0.248, loss=149.930, backward_time=0.221, grad_norm=49.061, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.892e-05, train_time=1.964 +[gpub024:0/32] 2024-01-21 12:59:03,330 (trainer:737) INFO: 2epoch:train:14401-14500batch: iter_time=8.467e-05, forward_time=0.162, loss_ctc=172.757, loss_att=211.088, acc=0.225, loss=199.588, backward_time=0.228, grad_norm=60.452, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.909e-05, train_time=1.395 +[gpub024:0/32] 2024-01-21 13:00:57,080 (trainer:737) INFO: 2epoch:train:14501-14600batch: iter_time=8.548e-05, forward_time=0.348, loss_ctc=153.311, loss_att=196.703, acc=0.232, loss=183.685, backward_time=0.280, grad_norm=48.713, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.925e-05, train_time=1.137 +[gpub024:0/32] 2024-01-21 13:03:52,273 (trainer:737) INFO: 2epoch:train:14601-14700batch: iter_time=8.819e-05, forward_time=0.232, loss_ctc=158.150, loss_att=192.988, acc=0.234, loss=182.537, backward_time=0.235, grad_norm=56.852, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.070, optim0_lr0=4.942e-05, train_time=1.751 +[gpub024:0/32] 2024-01-21 13:06:57,468 (trainer:737) INFO: 2epoch:train:14701-14800batch: iter_time=8.096e-05, forward_time=0.162, loss_ctc=145.524, loss_att=177.890, acc=0.234, loss=168.180, backward_time=0.223, grad_norm=50.138, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.959e-05, train_time=1.852 +[gpub024:0/32] 2024-01-21 13:08:39,278 (trainer:737) INFO: 2epoch:train:14801-14900batch: iter_time=8.262e-05, forward_time=0.161, loss_ctc=153.451, loss_att=190.574, acc=0.232, loss=179.437, backward_time=0.227, grad_norm=52.955, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.975e-05, train_time=1.018 +[gpub024:0/32] 2024-01-21 13:11:53,994 (trainer:737) INFO: 2epoch:train:14901-15000batch: iter_time=8.009e-05, forward_time=0.160, loss_ctc=142.911, loss_att=178.184, acc=0.242, loss=167.602, backward_time=0.223, grad_norm=54.443, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.051, optim0_lr0=4.992e-05, train_time=1.947 +[gpub024:0/32] 2024-01-21 13:48:22,540 (trainer:343) INFO: 2epoch results: [train] iter_time=0.482, forward_time=0.204, loss_ctc=211.941, loss_att=206.146, acc=0.213, loss=207.885, backward_time=0.242, grad_norm=78.034, clip=100.000, loss_scale=2.847e+08, optim_step_time=0.056, optim0_lr0=3.750e-05, train_time=1.840, time=7 hours, 40 minutes and 26.73 seconds, total_count=30000, gpu_max_cached_mem_GB=25.521, [valid] loss_ctc=147.384, cer_ctc=0.733, loss_att=149.420, acc=0.185, cer=0.708, wer=1.000, loss=148.809, time=36 minutes and 3.37 seconds, total_count=9342, gpu_max_cached_mem_GB=25.521 +[gpub024:0/32] 2024-01-21 13:48:35,183 (trainer:391) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub024:0/32] 2024-01-21 13:48:35,184 (trainer:272) INFO: 3/45epoch started. Estimated time to finish: 2 weeks, 3 days and 4 hours +[gpub024:0/32] 2024-01-21 13:48:35,195 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub024:0/32] 2024-01-21 13:48:53,028 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 13:48:56,417 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 13:48:56,417 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub024:0/32] 2024-01-21 13:48:56,420 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 14:00:42,551 (trainer:737) INFO: 3epoch:train:1-100batch: iter_time=6.334, forward_time=0.236, loss_ctc=164.879, loss_att=197.839, acc=0.227, loss=187.951, backward_time=0.261, grad_norm=67.852, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.059, optim0_lr0=5.077e-05, train_time=7.273 +[gpub024:0/32] 2024-01-21 14:02:03,118 (trainer:737) INFO: 3epoch:train:101-200batch: iter_time=7.964e-05, forward_time=0.161, loss_ctc=150.342, loss_att=196.520, acc=0.231, loss=182.667, backward_time=0.235, grad_norm=56.883, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=5.227e-05, train_time=0.805 +[gpub024:0/32] 2024-01-21 14:03:39,420 (trainer:737) INFO: 3epoch:train:201-300batch: iter_time=8.633e-05, forward_time=0.162, loss_ctc=156.844, loss_att=200.873, acc=0.225, loss=187.664, backward_time=0.228, grad_norm=61.095, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=5.377e-05, train_time=0.963 +[gpub024:0/32] 2024-01-21 14:05:44,638 (trainer:737) INFO: 3epoch:train:301-400batch: iter_time=8.713e-05, forward_time=0.161, loss_ctc=160.456, loss_att=204.208, acc=0.226, loss=191.083, backward_time=0.228, grad_norm=53.251, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=5.527e-05, train_time=1.252 +[gpub024:0/32] 2024-01-21 14:08:09,494 (trainer:737) INFO: 3epoch:train:401-500batch: iter_time=8.428e-05, forward_time=0.161, loss_ctc=167.574, loss_att=203.804, acc=0.228, loss=192.935, backward_time=0.226, grad_norm=70.335, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=5.677e-05, train_time=1.447 +[gpub024:0/32] 2024-01-21 14:10:54,410 (trainer:737) INFO: 3epoch:train:501-600batch: iter_time=8.870e-05, forward_time=0.161, loss_ctc=151.030, loss_att=193.038, acc=0.226, loss=180.436, backward_time=0.225, grad_norm=62.493, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=5.827e-05, train_time=1.650 +[gpub024:0/32] 2024-01-21 14:13:24,214 (trainer:737) INFO: 3epoch:train:601-700batch: iter_time=9.219e-05, forward_time=0.269, loss_ctc=151.594, loss_att=194.530, acc=0.232, loss=181.649, backward_time=0.261, grad_norm=52.749, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.066, optim0_lr0=5.977e-05, train_time=1.498 +[gpub024:0/32] 2024-01-21 14:16:36,449 (trainer:737) INFO: 3epoch:train:701-800batch: iter_time=4.938e-04, forward_time=0.416, loss_ctc=186.657, loss_att=215.152, acc=0.221, loss=206.604, backward_time=0.306, grad_norm=80.094, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.097, optim0_lr0=6.127e-05, train_time=1.922 +[gpub024:0/32] 2024-01-21 14:18:59,718 (trainer:737) INFO: 3epoch:train:801-900batch: iter_time=9.184e-05, forward_time=0.161, loss_ctc=156.436, loss_att=210.908, acc=0.224, loss=194.567, backward_time=0.224, grad_norm=56.058, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=6.277e-05, train_time=1.432 +[gpub024:0/32] 2024-01-21 14:21:10,574 (trainer:737) INFO: 3epoch:train:901-1000batch: iter_time=8.679e-05, forward_time=0.162, loss_ctc=153.718, loss_att=209.820, acc=0.226, loss=192.989, backward_time=0.226, grad_norm=59.704, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=6.427e-05, train_time=1.308 +[gpub024:0/32] 2024-01-21 14:23:17,185 (trainer:737) INFO: 3epoch:train:1001-1100batch: iter_time=8.682e-05, forward_time=0.161, loss_ctc=149.274, loss_att=177.560, acc=0.239, loss=169.074, backward_time=0.225, grad_norm=66.689, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=6.577e-05, train_time=1.266 +[gpub024:0/32] 2024-01-21 14:25:56,105 (trainer:737) INFO: 3epoch:train:1101-1200batch: iter_time=8.427e-05, forward_time=0.161, loss_ctc=161.349, loss_att=207.731, acc=0.224, loss=193.816, backward_time=0.226, grad_norm=57.282, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=6.727e-05, train_time=1.589 +[gpub024:0/32] 2024-01-21 14:27:55,600 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub024:0/32] 2024-01-21 14:28:14,801 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub024:0/32] 2024-01-21 14:28:18,408 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub024:0/32] 2024-01-21 14:28:18,408 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub024:0/32] 2024-01-21 14:28:18,411 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub024:0/32] 2024-01-21 14:39:54,078 (trainer:737) INFO: 3epoch:train:1201-1300batch: iter_time=6.851, forward_time=0.230, loss_ctc=143.889, loss_att=177.295, acc=0.245, loss=167.273, backward_time=0.242, grad_norm=61.605, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.054, optim0_lr0=6.877e-05, train_time=8.380 +[gpub024:0/32] 2024-01-21 14:41:35,222 (trainer:737) INFO: 3epoch:train:1301-1400batch: iter_time=8.133e-05, forward_time=0.163, loss_ctc=168.320, loss_att=209.472, acc=0.220, loss=197.126, backward_time=0.228, grad_norm=69.991, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=7.027e-05, train_time=1.011 +[gpub024:0/32] 2024-01-21 14:43:38,843 (trainer:737) INFO: 3epoch:train:1401-1500batch: iter_time=8.230e-05, forward_time=0.161, loss_ctc=152.600, loss_att=207.012, acc=0.231, loss=190.689, backward_time=0.224, grad_norm=59.556, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=7.177e-05, train_time=1.236 +[gpub024:0/32] 2024-01-21 14:45:16,055 (trainer:737) INFO: 3epoch:train:1501-1600batch: iter_time=8.089e-05, forward_time=0.160, loss_ctc=139.486, loss_att=175.690, acc=0.230, loss=164.829, backward_time=0.226, grad_norm=57.351, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=7.327e-05, train_time=0.972 +[gpub024:0/32] 2024-01-21 14:47:49,377 (trainer:737) INFO: 3epoch:train:1601-1700batch: iter_time=7.928e-05, forward_time=0.164, loss_ctc=172.898, loss_att=231.329, acc=0.226, loss=213.800, backward_time=0.236, grad_norm=68.368, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=7.477e-05, train_time=1.533 +[gpub024:0/32] 2024-01-21 14:50:05,665 (trainer:737) INFO: 3epoch:train:1701-1800batch: iter_time=8.141e-05, forward_time=0.161, loss_ctc=145.924, loss_att=187.528, acc=0.227, loss=175.047, backward_time=0.225, grad_norm=58.942, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.052, optim0_lr0=7.627e-05, train_time=1.363 +[gpub024:0/32] 2024-01-21 14:53:09,718 (trainer:737) INFO: 3epoch:train:1801-1900batch: iter_time=8.646e-05, forward_time=0.160, loss_ctc=142.683, loss_att=184.343, acc=0.235, loss=171.845, backward_time=0.224, grad_norm=58.001, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.051, optim0_lr0=7.777e-05, train_time=1.840 +[gpub024:0/32] 2024-01-21 14:56:10,488 (trainer:737) INFO: 3epoch:train:1901-2000batch: iter_time=8.438e-05, forward_time=0.342, loss_ctc=160.193, loss_att=203.916, acc=0.224, loss=190.799, backward_time=0.312, grad_norm=57.292, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.070, optim0_lr0=7.927e-05, train_time=1.807 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2877332.0 ON gpub024 CANCELLED AT 2024-01-21T14:57:39 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.2.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.2.log new file mode 100644 index 0000000000000000000000000000000000000000..afe900b3880166436bbb55ee6874198d86be7688 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.2.log @@ -0,0 +1,2649 @@ +# Running on gpub011.delta.ncsa.illinois.edu +# Started at Mon Feb 5 19:20:14 CST 2024 +# SLURMD_NODENAME=gpub011 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2944938 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1707355192 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2944938 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[011,042,044,098]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1707182392 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[011,042,044,098]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=2351959 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub011 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_eb219d8a-88e5-437d-9b13-11b6fb1761dd +/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_sh/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_sh/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_sh/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_eb219d8a-88e5-437d-9b13-11b6fb1761dd +ape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_eb219d8a-88e5-437d-9b13-11b6fb1761dd +ape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_eb219d8a-88e5-437d-9b13-11b6fb1761dd +ape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_eb219d8a-88e5-437d-9b13-11b6fb1761dd +[gpub011:0/16] 2024-02-05 19:25:40,541 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub011:0/16] 2024-02-05 19:25:40,561 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub011:0/16] 2024-02-05 19:25:40,728 (s2t:464) INFO: Vocabulary size: 50002 +[gpub011:0/16] 2024-02-05 19:25:53,456 (abs_task:1271) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub011:0/16] 2024-02-05 19:25:53,462 (abs_task:1272) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub011:0/16] 2024-02-05 19:25:53,462 (abs_task:1275) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub011:0/16] 2024-02-05 19:25:53,462 (abs_task:1276) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub011:0/16] 2024-02-05 19:25:53,464 (abs_task:1285) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub011:0/16] 2024-02-05 19:25:59,255 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 19:26:00,202 (abs_task:1663) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 19:26:00,202 (abs_task:1664) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub011:0/16] 2024-02-05 19:26:00,256 (abs_task:1665) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 19:26:19,189 (trainer:168) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub011:2352043:2352043 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2352043:2352043 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2352043:2352043 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2352043:2352043 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub011:0/16] 2024-02-05 19:26:27,335 (trainer:302) INFO: 29/45epoch started +[gpub011:0/16] 2024-02-05 19:26:27,376 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-05 19:26:45,423 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 19:26:48,936 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 19:26:48,936 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-05 19:26:48,939 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub044:436186:436186 [1] NCCL INFO cudaDriverVersion 12020 +gpub044:436186:436186 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.144<0> +gpub044:436186:436186 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub044:436186:436186 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub044:436186:436249 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub044:436186:436249 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub044:436186:436249 [1] NCCL INFO Using network AWS Libfabric +gpub044:436186:436249 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub044:436186:436249 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub044:436186:436249 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub044:436186:436249 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub044:436186:436249 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub044:436186:436249 [1] NCCL INFO Connected all rings +gpub044:436186:436249 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/AWS Libfabric/1 +gpub044:436186:436249 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub044:436186:436249 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub044:436186:436249 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub044:436186:436249 [1] NCCL INFO Connected all trees +gpub044:436186:436249 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub044:436186:436249 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub044:436186:436249 [1] NCCL INFO comm 0x55d99450d260 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub044:436187:436187 [2] NCCL INFO cudaDriverVersion 12020 +gpub044:436187:436187 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.144<0> +gpub044:436187:436187 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub044:436187:436187 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub044:436187:436251 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub044:436187:436251 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub044:436187:436251 [2] NCCL INFO Using network AWS Libfabric +gpub044:436187:436251 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub044:436187:436251 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub044:436187:436251 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub044:436187:436251 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub044:436187:436251 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub044:436187:436251 [2] NCCL INFO Connected all rings +gpub044:436187:436251 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub044:436187:436251 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub044:436187:436251 [2] NCCL INFO Connected all trees +gpub044:436187:436251 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub044:436187:436251 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub044:436187:436251 [2] NCCL INFO comm 0x559867b7eef0 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub044:436185:436185 [0] NCCL INFO cudaDriverVersion 12020 +gpub044:436185:436185 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.144<0> +gpub044:436185:436185 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub044:436185:436185 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub044:436185:436250 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub044:436185:436250 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub044:436185:436250 [0] NCCL INFO Using network AWS Libfabric +gpub044:436185:436250 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub044:436185:436250 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub044:436185:436250 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub044:436185:436250 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub044:436185:436250 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub044:436185:436250 [0] NCCL INFO Connected all rings +gpub044:436185:436250 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/AWS Libfabric/1 +gpub044:436185:436250 [0] NCCL INFO Connected all trees +gpub044:436185:436250 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub044:436185:436250 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub044:436185:436250 [0] NCCL INFO comm 0x55837b1d3b60 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub044:436188:436188 [3] NCCL INFO cudaDriverVersion 12020 +gpub044:436188:436188 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.144<0> +gpub044:436188:436188 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub044:436188:436188 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub044:436188:436252 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub044:436188:436252 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub044:436188:436252 [3] NCCL INFO Using network AWS Libfabric +gpub044:436188:436252 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub044:436188:436252 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub044:436188:436252 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub044:436188:436252 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub044:436188:436252 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub044:436188:436252 [3] NCCL INFO Connected all rings +gpub044:436188:436252 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub044:436188:436252 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub044:436188:436252 [3] NCCL INFO Connected all trees +gpub044:436188:436252 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub044:436188:436252 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub044:436188:436252 [3] NCCL INFO comm 0x55afd4f9c990 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub011:2352046:2352046 [3] NCCL INFO cudaDriverVersion 12020 +gpub011:2352046:2352046 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2352046:2352046 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2352046:2352046 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2352046:2352106 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2352046:2352106 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2352046:2352106 [3] NCCL INFO Using network AWS Libfabric +gpub011:2352046:2352106 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub011:2352046:2352106 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2352046:2352106 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub011:2352046:2352106 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub011:2352046:2352106 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub011:2352046:2352106 [3] NCCL INFO Connected all rings +gpub011:2352046:2352106 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub011:2352046:2352106 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub011:2352046:2352106 [3] NCCL INFO Connected all trees +gpub011:2352046:2352106 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2352046:2352106 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2352046:2352106 [3] NCCL INFO comm 0x56380d66ed60 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub011:2352045:2352045 [2] NCCL INFO cudaDriverVersion 12020 +gpub011:2352045:2352045 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2352045:2352045 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2352045:2352045 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2352045:2352105 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2352045:2352105 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2352045:2352105 [2] NCCL INFO Using network AWS Libfabric +gpub011:2352045:2352105 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub011:2352045:2352105 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2352045:2352105 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub011:2352045:2352105 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub011:2352045:2352105 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub011:2352045:2352105 [2] NCCL INFO Connected all rings +gpub011:2352045:2352105 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub011:2352045:2352105 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub011:2352045:2352105 [2] NCCL INFO Connected all trees +gpub011:2352045:2352105 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2352045:2352105 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2352045:2352105 [2] NCCL INFO comm 0x5611d88aa8d0 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub011:2352044:2352044 [1] NCCL INFO cudaDriverVersion 12020 +gpub011:2352044:2352044 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:2352044:2352044 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub011:2352044:2352044 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub011:2352044:2352104 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2352044:2352104 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2352044:2352104 [1] NCCL INFO Using network AWS Libfabric +gpub011:2352044:2352104 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub011:2352044:2352104 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2352044:2352104 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub011:2352044:2352104 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub011:2352044:2352104 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub011:2352044:2352104 [1] NCCL INFO Connected all rings +gpub011:2352044:2352104 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub011:2352044:2352104 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub011:2352044:2352104 [1] NCCL INFO Connected all trees +gpub011:2352044:2352104 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2352044:2352104 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2352044:2352104 [1] NCCL INFO comm 0x55d410f2c6a0 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub011:2352043:2352107 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub011:2352043:2352107 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub011:2352043:2352107 [0] NCCL INFO Using network AWS Libfabric +gpub011:2352043:2352107 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub011:2352043:2352107 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub011:2352043:2352107 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub011:2352043:2352107 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub011:2352043:2352107 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub011:2352043:2352107 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2352043:2352107 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2352043:2352107 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub011:2352043:2352107 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub011:2352043:2352107 [0] NCCL INFO Connected all rings +gpub011:2352043:2352107 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub011:2352043:2352107 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2352043:2352107 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub011:2352043:2352107 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub011:2352043:2352107 [0] NCCL INFO Connected all trees +gpub011:2352043:2352107 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub011:2352043:2352107 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:2352043:2352107 [0] NCCL INFO comm 0x556276893b30 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub098:1865146:1865146 [2] NCCL INFO cudaDriverVersion 12020 +gpub098:1865146:1865146 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1865146:1865146 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub098:1865146:1865146 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub098:1865146:1865207 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub098:1865146:1865207 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub098:1865146:1865207 [2] NCCL INFO Using network AWS Libfabric +gpub098:1865146:1865207 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub098:1865146:1865207 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub098:1865146:1865207 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub098:1865146:1865207 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub098:1865146:1865207 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub098:1865146:1865207 [2] NCCL INFO Connected all rings +gpub098:1865146:1865207 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub098:1865146:1865207 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub098:1865146:1865207 [2] NCCL INFO Connected all trees +gpub098:1865146:1865207 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub098:1865146:1865207 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1865146:1865207 [2] NCCL INFO comm 0x5603c9eca3f0 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub098:1865145:1865145 [1] NCCL INFO cudaDriverVersion 12020 +gpub098:1865145:1865145 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1865145:1865145 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub098:1865145:1865145 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub098:1865145:1865206 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub098:1865145:1865206 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub098:1865145:1865206 [1] NCCL INFO Using network AWS Libfabric +gpub098:1865145:1865206 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub098:1865145:1865206 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub098:1865145:1865206 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub098:1865145:1865206 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub098:1865145:1865206 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub098:1865145:1865206 [1] NCCL INFO Connected all rings +gpub098:1865145:1865206 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub098:1865145:1865206 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub098:1865145:1865206 [1] NCCL INFO Connected all trees +gpub098:1865145:1865206 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub098:1865145:1865206 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1865145:1865206 [1] NCCL INFO comm 0x55ee8b529330 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1865144:1865144 [0] NCCL INFO cudaDriverVersion 12020 +gpub098:1865144:1865144 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1865144:1865144 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub098:1865144:1865144 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub098:1865144:1865204 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub098:1865144:1865204 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub098:1865144:1865204 [0] NCCL INFO Using network AWS Libfabric +gpub098:1865144:1865204 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub098:1865144:1865204 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub098:1865144:1865204 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub098:1865144:1865204 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub098:1865144:1865204 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub098:1865144:1865204 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub098:1865144:1865204 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub098:1865144:1865204 [0] NCCL INFO Connected all rings +gpub098:1865144:1865204 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub098:1865144:1865204 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub098:1865144:1865204 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub098:1865144:1865204 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub098:1865144:1865204 [0] NCCL INFO Connected all trees +gpub098:1865144:1865204 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub098:1865144:1865204 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1865144:1865204 [0] NCCL INFO comm 0x558641031470 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub042:2862845:2862845 [3] NCCL INFO cudaDriverVersion 12020 +gpub042:2862845:2862845 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:2862845:2862845 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub042:2862845:2862845 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub042:2862845:2862902 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub042:2862845:2862902 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub042:2862845:2862902 [3] NCCL INFO Using network AWS Libfabric +gpub042:2862845:2862902 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub042:2862845:2862902 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub042:2862845:2862902 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub042:2862845:2862902 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub042:2862845:2862902 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub042:2862845:2862902 [3] NCCL INFO Connected all rings +gpub042:2862845:2862902 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub042:2862845:2862902 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub042:2862845:2862902 [3] NCCL INFO Connected all trees +gpub042:2862845:2862902 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub042:2862845:2862902 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:2862845:2862902 [3] NCCL INFO comm 0x561137684d90 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub042:2862843:2862843 [1] NCCL INFO cudaDriverVersion 12020 +gpub042:2862843:2862843 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:2862843:2862843 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub042:2862843:2862843 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub042:2862843:2862900 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub042:2862843:2862900 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub042:2862843:2862900 [1] NCCL INFO Using network AWS Libfabric +gpub042:2862843:2862900 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub042:2862843:2862900 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub042:2862843:2862900 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub042:2862843:2862900 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub042:2862843:2862900 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub042:2862843:2862900 [1] NCCL INFO Connected all rings +gpub042:2862843:2862900 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub042:2862843:2862900 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/AWS Libfabric/1 +gpub042:2862843:2862900 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub042:2862843:2862900 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub042:2862843:2862900 [1] NCCL INFO Connected all trees +gpub042:2862843:2862900 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub042:2862843:2862900 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:2862843:2862900 [1] NCCL INFO comm 0x560712643a50 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub042:2862842:2862842 [0] NCCL INFO cudaDriverVersion 12020 +gpub042:2862842:2862842 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:2862842:2862842 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub042:2862842:2862842 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub042:2862842:2862903 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub042:2862842:2862903 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub042:2862842:2862903 [0] NCCL INFO Using network AWS Libfabric +gpub042:2862842:2862903 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub042:2862842:2862903 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub042:2862842:2862903 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub042:2862842:2862903 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub042:2862842:2862903 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub042:2862842:2862903 [0] NCCL INFO Connected all rings +gpub042:2862842:2862903 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub042:2862842:2862903 [0] NCCL INFO Connected all trees +gpub042:2862842:2862903 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub042:2862842:2862903 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:2862842:2862903 [0] NCCL INFO comm 0x55d6392f0d80 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub098:1865147:1865147 [3] NCCL INFO cudaDriverVersion 12020 +gpub098:1865147:1865147 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1865147:1865147 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub098:1865147:1865147 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub098:1865147:1865205 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub098:1865147:1865205 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub098:1865147:1865205 [3] NCCL INFO Using network AWS Libfabric +gpub098:1865147:1865205 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub098:1865147:1865205 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub098:1865147:1865205 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub098:1865147:1865205 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub098:1865147:1865205 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub098:1865147:1865205 [3] NCCL INFO Connected all rings +gpub098:1865147:1865205 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub098:1865147:1865205 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub098:1865147:1865205 [3] NCCL INFO Connected all trees +gpub098:1865147:1865205 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub098:1865147:1865205 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1865147:1865205 [3] NCCL INFO comm 0x560ec625b210 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub042:2862844:2862844 [2] NCCL INFO cudaDriverVersion 12020 +gpub042:2862844:2862844 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.142<0> +gpub042:2862844:2862844 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub042:2862844:2862844 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub042:2862844:2862901 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub042:2862844:2862901 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub042:2862844:2862901 [2] NCCL INFO Using network AWS Libfabric +gpub042:2862844:2862901 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub042:2862844:2862901 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub042:2862844:2862901 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub042:2862844:2862901 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub042:2862844:2862901 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub042:2862844:2862901 [2] NCCL INFO Connected all rings +gpub042:2862844:2862901 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub042:2862844:2862901 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub042:2862844:2862901 [2] NCCL INFO Connected all trees +gpub042:2862844:2862901 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub042:2862844:2862901 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub042:2862844:2862901 [2] NCCL INFO comm 0x55b48fc49950 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +[gpub011:0/16] 2024-02-05 19:31:47,732 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub011:0/16] 2024-02-05 19:33:46,028 (trainer:762) INFO: 29epoch:train:1-100batch: iter_time=1.159, forward_time=0.535, loss_ctc=45.751, loss_att=37.549, acc=0.779, loss=40.010, backward_time=0.330, grad_norm=39.679, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.890e-04, train_time=4.364 +[gpub011:0/16] 2024-02-05 19:36:13,837 (trainer:762) INFO: 29epoch:train:101-200batch: iter_time=9.318e-05, forward_time=0.435, loss_ctc=53.238, loss_att=50.932, acc=0.740, loss=51.624, backward_time=0.361, grad_norm=44.831, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.890e-04, train_time=1.500 +[gpub011:0/16] 2024-02-05 19:38:43,513 (trainer:762) INFO: 29epoch:train:201-300batch: iter_time=8.148e-05, forward_time=0.386, loss_ctc=51.649, loss_att=49.608, acc=0.751, loss=50.220, backward_time=0.324, grad_norm=44.247, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.889e-04, train_time=1.497 +[gpub011:0/16] 2024-02-05 19:40:57,066 (trainer:762) INFO: 29epoch:train:301-400batch: iter_time=8.546e-05, forward_time=0.315, loss_ctc=61.299, loss_att=52.789, acc=0.731, loss=55.342, backward_time=0.314, grad_norm=53.932, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.889e-04, train_time=1.335 +[gpub011:0/16] 2024-02-05 19:43:54,291 (trainer:762) INFO: 29epoch:train:401-500batch: iter_time=8.279e-05, forward_time=0.415, loss_ctc=45.254, loss_att=47.239, acc=0.734, loss=46.644, backward_time=0.339, grad_norm=40.595, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.889e-04, train_time=1.772 +[gpub011:0/16] 2024-02-05 19:46:32,037 (trainer:762) INFO: 29epoch:train:501-600batch: iter_time=8.024e-05, forward_time=0.323, loss_ctc=48.515, loss_att=43.992, acc=0.745, loss=45.349, backward_time=0.298, grad_norm=44.145, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.889e-04, train_time=1.578 +[gpub011:0/16] 2024-02-05 19:49:28,702 (trainer:762) INFO: 29epoch:train:601-700batch: iter_time=0.001, forward_time=0.453, loss_ctc=54.414, loss_att=48.819, acc=0.751, loss=50.498, backward_time=0.374, grad_norm=39.723, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.889e-04, train_time=1.766 +[gpub011:0/16] 2024-02-05 19:51:52,050 (trainer:762) INFO: 29epoch:train:701-800batch: iter_time=8.334e-05, forward_time=0.298, loss_ctc=53.375, loss_att=51.747, acc=0.733, loss=52.235, backward_time=0.298, grad_norm=39.904, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.888e-04, train_time=1.434 +[gpub011:0/16] 2024-02-05 19:53:42,743 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-05 19:55:04,605 (trainer:762) INFO: 29epoch:train:801-900batch: iter_time=3.502e-04, forward_time=0.408, loss_ctc=61.251, loss_att=53.019, acc=0.747, loss=55.488, backward_time=0.407, grad_norm=48.037, clip=100.000, loss_scale=3.986e+33, optim_step_time=0.107, optim0_lr0=1.888e-04, train_time=1.925 +[gpub011:0/16] 2024-02-05 19:57:13,705 (trainer:762) INFO: 29epoch:train:901-1000batch: iter_time=8.402e-05, forward_time=0.296, loss_ctc=46.817, loss_att=42.474, acc=0.749, loss=43.777, backward_time=0.298, grad_norm=35.655, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.888e-04, train_time=1.291 +[gpub011:0/16] 2024-02-05 20:00:35,310 (trainer:762) INFO: 29epoch:train:1001-1100batch: iter_time=8.275e-05, forward_time=0.450, loss_ctc=47.829, loss_att=46.221, acc=0.755, loss=46.703, backward_time=0.367, grad_norm=36.867, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.105, optim0_lr0=1.888e-04, train_time=2.015 +[gpub011:0/16] 2024-02-05 20:02:32,507 (trainer:762) INFO: 29epoch:train:1101-1200batch: iter_time=7.738e-05, forward_time=0.302, loss_ctc=52.013, loss_att=50.143, acc=0.741, loss=50.704, backward_time=0.301, grad_norm=53.205, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.887e-04, train_time=1.172 +[gpub011:0/16] 2024-02-05 20:04:39,060 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-05 20:04:57,952 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 20:05:01,473 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 20:05:01,473 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-05 20:05:01,478 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 20:12:36,766 (trainer:762) INFO: 29epoch:train:1201-1300batch: iter_time=4.113, forward_time=0.374, loss_ctc=42.747, loss_att=44.605, acc=0.751, loss=44.048, backward_time=0.335, grad_norm=38.387, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.887e-04, train_time=6.043 +[gpub011:0/16] 2024-02-05 20:14:55,316 (trainer:762) INFO: 29epoch:train:1301-1400batch: iter_time=8.422e-05, forward_time=0.401, loss_ctc=46.665, loss_att=43.039, acc=0.757, loss=44.127, backward_time=0.314, grad_norm=41.448, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.887e-04, train_time=1.385 +[gpub011:0/16] 2024-02-05 20:17:18,508 (trainer:762) INFO: 29epoch:train:1401-1500batch: iter_time=8.363e-05, forward_time=0.313, loss_ctc=57.086, loss_att=52.263, acc=0.748, loss=53.710, backward_time=0.307, grad_norm=45.118, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.887e-04, train_time=1.432 +[gpub011:0/16] 2024-02-05 20:20:05,391 (trainer:762) INFO: 29epoch:train:1501-1600batch: iter_time=8.166e-05, forward_time=0.442, loss_ctc=48.394, loss_att=44.548, acc=0.754, loss=45.702, backward_time=0.341, grad_norm=42.812, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.109, optim0_lr0=1.887e-04, train_time=1.669 +[gpub011:0/16] 2024-02-05 20:22:08,647 (trainer:762) INFO: 29epoch:train:1601-1700batch: iter_time=7.728e-05, forward_time=0.302, loss_ctc=57.273, loss_att=53.901, acc=0.725, loss=54.913, backward_time=0.316, grad_norm=48.533, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.886e-04, train_time=1.232 +[gpub011:0/16] 2024-02-05 20:24:23,813 (trainer:762) INFO: 29epoch:train:1701-1800batch: iter_time=7.791e-05, forward_time=0.346, loss_ctc=39.617, loss_att=33.785, acc=0.768, loss=35.534, backward_time=0.331, grad_norm=40.884, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.886e-04, train_time=1.352 +[gpub011:0/16] 2024-02-05 20:26:44,269 (trainer:762) INFO: 29epoch:train:1801-1900batch: iter_time=4.164e-04, forward_time=0.395, loss_ctc=55.014, loss_att=49.717, acc=0.750, loss=51.306, backward_time=0.324, grad_norm=40.466, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.886e-04, train_time=1.403 +[gpub011:0/16] 2024-02-05 20:29:24,608 (trainer:762) INFO: 29epoch:train:1901-2000batch: iter_time=7.862e-05, forward_time=0.392, loss_ctc=48.087, loss_att=48.554, acc=0.730, loss=48.414, backward_time=0.348, grad_norm=37.874, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.886e-04, train_time=1.604 +[gpub011:0/16] 2024-02-05 20:31:23,820 (trainer:762) INFO: 29epoch:train:2001-2100batch: iter_time=8.088e-05, forward_time=0.296, loss_ctc=57.560, loss_att=47.611, acc=0.754, loss=50.596, backward_time=0.305, grad_norm=42.271, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.885e-04, train_time=1.192 +[gpub011:0/16] 2024-02-05 20:33:26,543 (trainer:762) INFO: 29epoch:train:2101-2200batch: iter_time=8.385e-05, forward_time=0.308, loss_ctc=55.734, loss_att=53.950, acc=0.736, loss=54.485, backward_time=0.305, grad_norm=45.241, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.885e-04, train_time=1.226 +[gpub011:0/16] 2024-02-05 20:35:59,940 (trainer:762) INFO: 29epoch:train:2201-2300batch: iter_time=4.490e-04, forward_time=0.411, loss_ctc=45.072, loss_att=43.733, acc=0.754, loss=44.135, backward_time=0.378, grad_norm=36.919, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=1.885e-04, train_time=1.535 +[gpub011:0/16] 2024-02-05 20:38:38,757 (trainer:762) INFO: 29epoch:train:2301-2400batch: iter_time=8.054e-05, forward_time=0.355, loss_ctc=52.275, loss_att=49.663, acc=0.749, loss=50.447, backward_time=0.308, grad_norm=47.989, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.885e-04, train_time=1.587 +[gpub011:0/16] 2024-02-05 20:40:53,532 (trainer:762) INFO: 29epoch:train:2401-2500batch: iter_time=7.819e-05, forward_time=0.301, loss_ctc=42.864, loss_att=46.370, acc=0.728, loss=45.318, backward_time=0.315, grad_norm=36.562, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.885e-04, train_time=1.347 +[gpub011:0/16] 2024-02-05 20:41:13,649 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-05 20:41:32,234 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 20:41:35,639 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 20:41:35,639 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-05 20:41:35,646 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 20:49:45,657 (trainer:762) INFO: 29epoch:train:2501-2600batch: iter_time=3.594, forward_time=0.410, loss_ctc=44.299, loss_att=37.456, acc=0.786, loss=39.509, backward_time=0.315, grad_norm=37.796, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.884e-04, train_time=5.323 +[gpub011:0/16] 2024-02-05 20:52:01,858 (trainer:762) INFO: 29epoch:train:2601-2700batch: iter_time=3.865e-04, forward_time=0.316, loss_ctc=50.795, loss_att=52.446, acc=0.744, loss=51.950, backward_time=0.309, grad_norm=47.588, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.884e-04, train_time=1.362 +[gpub011:0/16] 2024-02-05 20:54:17,712 (trainer:762) INFO: 29epoch:train:2701-2800batch: iter_time=7.781e-05, forward_time=0.310, loss_ctc=49.339, loss_att=47.872, acc=0.765, loss=48.312, backward_time=0.317, grad_norm=42.217, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.884e-04, train_time=1.358 +[gpub011:0/16] 2024-02-05 20:56:50,039 (trainer:762) INFO: 29epoch:train:2801-2900batch: iter_time=8.228e-05, forward_time=0.390, loss_ctc=56.850, loss_att=50.902, acc=0.741, loss=52.686, backward_time=0.315, grad_norm=51.021, clip=100.000, loss_scale=3.790e+33, optim_step_time=0.111, optim0_lr0=1.884e-04, train_time=1.524 +[gpub011:0/16] 2024-02-05 20:59:16,469 (trainer:762) INFO: 29epoch:train:2901-3000batch: iter_time=8.299e-05, forward_time=0.341, loss_ctc=43.959, loss_att=45.750, acc=0.755, loss=45.213, backward_time=0.298, grad_norm=38.645, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.883e-04, train_time=1.464 +[gpub011:0/16] 2024-02-05 21:01:38,693 (trainer:762) INFO: 29epoch:train:3001-3100batch: iter_time=8.135e-05, forward_time=0.302, loss_ctc=46.555, loss_att=42.559, acc=0.761, loss=43.758, backward_time=0.316, grad_norm=40.314, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.883e-04, train_time=1.422 +[gpub011:0/16] 2024-02-05 21:03:49,714 (trainer:762) INFO: 29epoch:train:3101-3200batch: iter_time=8.230e-05, forward_time=0.373, loss_ctc=52.518, loss_att=48.425, acc=0.759, loss=49.653, backward_time=0.323, grad_norm=39.125, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.883e-04, train_time=1.309 +[gpub011:0/16] 2024-02-05 21:06:17,563 (trainer:762) INFO: 29epoch:train:3201-3300batch: iter_time=8.138e-05, forward_time=0.303, loss_ctc=53.233, loss_att=53.598, acc=0.737, loss=53.489, backward_time=0.302, grad_norm=41.319, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.883e-04, train_time=1.479 +[gpub011:0/16] 2024-02-05 21:08:50,659 (trainer:762) INFO: 29epoch:train:3301-3400batch: iter_time=8.354e-05, forward_time=0.309, loss_ctc=57.701, loss_att=51.359, acc=0.760, loss=53.261, backward_time=0.313, grad_norm=46.479, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.883e-04, train_time=1.531 +[gpub011:0/16] 2024-02-05 21:10:46,374 (trainer:762) INFO: 29epoch:train:3401-3500batch: iter_time=7.752e-05, forward_time=0.306, loss_ctc=46.233, loss_att=41.963, acc=0.757, loss=43.244, backward_time=0.310, grad_norm=37.833, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.882e-04, train_time=1.157 +[gpub011:0/16] 2024-02-05 21:13:40,479 (trainer:762) INFO: 29epoch:train:3501-3600batch: iter_time=1.954e-04, forward_time=0.397, loss_ctc=47.493, loss_att=45.000, acc=0.767, loss=45.748, backward_time=0.311, grad_norm=36.052, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.882e-04, train_time=1.740 +[gpub011:0/16] 2024-02-05 21:16:06,146 (trainer:762) INFO: 29epoch:train:3601-3700batch: iter_time=8.678e-05, forward_time=0.313, loss_ctc=49.532, loss_att=49.807, acc=0.749, loss=49.724, backward_time=0.302, grad_norm=46.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.882e-04, train_time=1.458 +[gpub011:0/16] 2024-02-05 21:17:32,317 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-05 21:17:51,006 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 21:17:54,490 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 21:17:54,491 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-05 21:17:54,496 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 21:24:29,205 (trainer:762) INFO: 29epoch:train:3701-3800batch: iter_time=3.694, forward_time=0.312, loss_ctc=42.326, loss_att=43.918, acc=0.763, loss=43.440, backward_time=0.305, grad_norm=35.945, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.882e-04, train_time=5.030 +[gpub011:0/16] 2024-02-05 21:26:42,865 (trainer:762) INFO: 29epoch:train:3801-3900batch: iter_time=7.249e-05, forward_time=0.378, loss_ctc=44.978, loss_att=45.082, acc=0.756, loss=45.051, backward_time=0.322, grad_norm=40.714, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.881e-04, train_time=1.336 +[gpub011:0/16] 2024-02-05 21:29:13,899 (trainer:762) INFO: 29epoch:train:3901-4000batch: iter_time=7.898e-05, forward_time=0.315, loss_ctc=54.695, loss_att=51.889, acc=0.761, loss=52.731, backward_time=0.319, grad_norm=50.014, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.881e-04, train_time=1.510 +[gpub011:0/16] 2024-02-05 21:31:20,611 (trainer:762) INFO: 29epoch:train:4001-4100batch: iter_time=8.106e-05, forward_time=0.323, loss_ctc=47.141, loss_att=43.964, acc=0.761, loss=44.917, backward_time=0.307, grad_norm=41.889, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.881e-04, train_time=1.267 +[gpub011:0/16] 2024-02-05 21:33:22,276 (trainer:762) INFO: 29epoch:train:4101-4200batch: iter_time=8.360e-05, forward_time=0.307, loss_ctc=56.067, loss_att=53.654, acc=0.738, loss=54.378, backward_time=0.307, grad_norm=46.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.881e-04, train_time=1.217 +[gpub011:0/16] 2024-02-05 21:36:01,446 (trainer:762) INFO: 29epoch:train:4201-4300batch: iter_time=8.149e-05, forward_time=0.348, loss_ctc=38.681, loss_att=34.063, acc=0.775, loss=35.448, backward_time=0.312, grad_norm=40.355, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.881e-04, train_time=1.591 +[gpub011:0/16] 2024-02-05 21:38:08,288 (trainer:762) INFO: 29epoch:train:4301-4400batch: iter_time=7.794e-05, forward_time=0.320, loss_ctc=54.505, loss_att=49.096, acc=0.763, loss=50.718, backward_time=0.312, grad_norm=39.779, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.880e-04, train_time=1.267 +[gpub011:0/16] 2024-02-05 21:40:32,875 (trainer:762) INFO: 29epoch:train:4401-4500batch: iter_time=8.090e-05, forward_time=0.306, loss_ctc=47.115, loss_att=50.424, acc=0.735, loss=49.431, backward_time=0.309, grad_norm=37.841, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.880e-04, train_time=1.446 +[gpub011:0/16] 2024-02-05 21:42:35,366 (trainer:762) INFO: 29epoch:train:4501-4600batch: iter_time=7.978e-05, forward_time=0.309, loss_ctc=55.361, loss_att=47.285, acc=0.762, loss=49.708, backward_time=0.311, grad_norm=41.280, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.880e-04, train_time=1.225 +[gpub011:0/16] 2024-02-05 21:44:51,511 (trainer:762) INFO: 29epoch:train:4601-4700batch: iter_time=7.930e-05, forward_time=0.397, loss_ctc=54.412, loss_att=53.393, acc=0.747, loss=53.698, backward_time=0.321, grad_norm=46.547, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.880e-04, train_time=1.361 +[gpub011:0/16] 2024-02-05 21:47:24,194 (trainer:762) INFO: 29epoch:train:4701-4800batch: iter_time=8.062e-05, forward_time=0.298, loss_ctc=44.240, loss_att=44.010, acc=0.758, loss=44.079, backward_time=0.301, grad_norm=36.087, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.879e-04, train_time=1.526 +[gpub011:0/16] 2024-02-05 21:49:27,081 (trainer:762) INFO: 29epoch:train:4801-4900batch: iter_time=8.036e-05, forward_time=0.321, loss_ctc=51.272, loss_att=48.691, acc=0.763, loss=49.465, backward_time=0.307, grad_norm=43.480, clip=100.000, loss_scale=7.581e+33, optim_step_time=0.098, optim0_lr0=1.879e-04, train_time=1.229 +[gpub011:0/16] 2024-02-05 21:51:33,484 (trainer:762) INFO: 29epoch:train:4901-5000batch: iter_time=8.072e-05, forward_time=0.330, loss_ctc=42.188, loss_att=45.504, acc=0.746, loss=44.509, backward_time=0.307, grad_norm=35.619, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.879e-04, train_time=1.264 +[gpub011:0/16] 2024-02-05 21:51:53,513 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-05 21:52:12,309 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 21:52:15,760 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 21:52:15,760 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-05 21:52:15,803 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 22:00:33,559 (trainer:762) INFO: 29epoch:train:5001-5100batch: iter_time=3.791, forward_time=0.381, loss_ctc=43.081, loss_att=36.918, acc=0.788, loss=38.767, backward_time=0.306, grad_norm=37.478, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.879e-04, train_time=5.400 +[gpub011:0/16] 2024-02-05 22:02:37,359 (trainer:762) INFO: 29epoch:train:5101-5200batch: iter_time=8.259e-05, forward_time=0.325, loss_ctc=50.757, loss_att=50.522, acc=0.748, loss=50.592, backward_time=0.303, grad_norm=44.214, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.879e-04, train_time=1.238 +[gpub011:0/16] 2024-02-05 22:04:57,660 (trainer:762) INFO: 29epoch:train:5201-5300batch: iter_time=8.389e-05, forward_time=0.316, loss_ctc=48.861, loss_att=47.035, acc=0.767, loss=47.583, backward_time=0.310, grad_norm=41.259, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.878e-04, train_time=1.403 +[gpub011:0/16] 2024-02-05 22:07:40,583 (trainer:762) INFO: 29epoch:train:5301-5400batch: iter_time=8.887e-05, forward_time=0.389, loss_ctc=56.147, loss_att=50.636, acc=0.744, loss=52.289, backward_time=0.318, grad_norm=52.913, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=1.878e-04, train_time=1.629 +[gpub011:0/16] 2024-02-05 22:09:42,878 (trainer:762) INFO: 29epoch:train:5401-5500batch: iter_time=7.953e-05, forward_time=0.316, loss_ctc=43.839, loss_att=45.067, acc=0.758, loss=44.698, backward_time=0.307, grad_norm=37.677, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=1.878e-04, train_time=1.222 +[gpub011:0/16] 2024-02-05 22:11:40,118 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-05 22:11:56,172 (trainer:762) INFO: 29epoch:train:5501-5600batch: iter_time=8.661e-05, forward_time=0.314, loss_ctc=45.882, loss_att=42.393, acc=0.763, loss=43.440, backward_time=0.309, grad_norm=40.214, clip=100.000, loss_scale=9.598e+33, optim_step_time=0.097, optim0_lr0=1.878e-04, train_time=1.333 +[gpub011:0/16] 2024-02-05 22:14:43,908 (trainer:762) INFO: 29epoch:train:5601-5700batch: iter_time=8.142e-05, forward_time=0.422, loss_ctc=52.150, loss_att=47.617, acc=0.761, loss=48.977, backward_time=0.337, grad_norm=41.111, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.877e-04, train_time=1.677 +[gpub011:0/16] 2024-02-05 22:16:42,092 (trainer:762) INFO: 29epoch:train:5701-5800batch: iter_time=8.127e-05, forward_time=0.322, loss_ctc=52.581, loss_att=53.197, acc=0.737, loss=53.012, backward_time=0.308, grad_norm=38.104, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.877e-04, train_time=1.182 +[gpub011:0/16] 2024-02-05 22:18:58,057 (trainer:762) INFO: 29epoch:train:5801-5900batch: iter_time=8.265e-05, forward_time=0.323, loss_ctc=55.838, loss_att=50.927, acc=0.761, loss=52.400, backward_time=0.311, grad_norm=43.877, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.877e-04, train_time=1.359 +[gpub011:0/16] 2024-02-05 22:21:23,985 (trainer:762) INFO: 29epoch:train:5901-6000batch: iter_time=8.585e-05, forward_time=0.383, loss_ctc=46.025, loss_att=41.441, acc=0.759, loss=42.816, backward_time=0.326, grad_norm=37.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.877e-04, train_time=1.460 +[gpub011:0/16] 2024-02-05 22:23:58,099 (trainer:762) INFO: 29epoch:train:6001-6100batch: iter_time=8.938e-05, forward_time=0.324, loss_ctc=46.535, loss_att=44.275, acc=0.771, loss=44.953, backward_time=0.305, grad_norm=35.440, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.877e-04, train_time=1.541 +[gpub011:0/16] 2024-02-05 22:26:17,218 (trainer:762) INFO: 29epoch:train:6101-6200batch: iter_time=9.558e-05, forward_time=0.300, loss_ctc=49.477, loss_att=49.699, acc=0.750, loss=49.633, backward_time=0.327, grad_norm=45.999, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.876e-04, train_time=1.391 +[gpub011:0/16] 2024-02-05 22:27:33,748 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-05 22:27:52,483 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 22:27:55,954 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 22:27:55,954 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-05 22:27:55,959 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 22:35:14,067 (trainer:762) INFO: 29epoch:train:6201-6300batch: iter_time=3.843, forward_time=0.365, loss_ctc=42.054, loss_att=43.445, acc=0.765, loss=43.028, backward_time=0.309, grad_norm=36.742, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.876e-04, train_time=5.369 +[gpub011:0/16] 2024-02-05 22:37:11,090 (trainer:762) INFO: 29epoch:train:6301-6400batch: iter_time=6.171e-04, forward_time=0.349, loss_ctc=44.908, loss_att=44.767, acc=0.760, loss=44.810, backward_time=0.308, grad_norm=42.784, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.876e-04, train_time=1.170 +[gpub011:0/16] 2024-02-05 22:39:38,848 (trainer:762) INFO: 29epoch:train:6401-6500batch: iter_time=7.985e-05, forward_time=0.308, loss_ctc=53.465, loss_att=51.253, acc=0.763, loss=51.916, backward_time=0.310, grad_norm=44.629, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.876e-04, train_time=1.477 +[gpub011:0/16] 2024-02-05 22:42:05,166 (trainer:762) INFO: 29epoch:train:6501-6600batch: iter_time=7.910e-05, forward_time=0.382, loss_ctc=47.495, loss_att=44.082, acc=0.762, loss=45.106, backward_time=0.310, grad_norm=41.913, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.120, optim0_lr0=1.875e-04, train_time=1.463 +[gpub011:0/16] 2024-02-05 22:44:11,666 (trainer:762) INFO: 29epoch:train:6601-6700batch: iter_time=7.900e-05, forward_time=0.306, loss_ctc=55.346, loss_att=52.601, acc=0.740, loss=53.424, backward_time=0.308, grad_norm=48.015, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.875e-04, train_time=1.265 +[gpub011:0/16] 2024-02-05 22:46:56,780 (trainer:762) INFO: 29epoch:train:6701-6800batch: iter_time=7.975e-05, forward_time=0.318, loss_ctc=37.901, loss_att=33.819, acc=0.778, loss=35.044, backward_time=0.309, grad_norm=39.459, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.875e-04, train_time=1.651 +[gpub011:0/16] 2024-02-05 22:48:59,910 (trainer:762) INFO: 29epoch:train:6801-6900batch: iter_time=7.903e-05, forward_time=0.293, loss_ctc=54.344, loss_att=48.696, acc=0.763, loss=50.390, backward_time=0.300, grad_norm=41.396, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.875e-04, train_time=1.231 +[gpub011:0/16] 2024-02-05 22:51:15,562 (trainer:762) INFO: 29epoch:train:6901-7000batch: iter_time=2.974e-04, forward_time=0.379, loss_ctc=47.196, loss_att=50.571, acc=0.737, loss=49.558, backward_time=0.316, grad_norm=36.857, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.109, optim0_lr0=1.875e-04, train_time=1.356 +[gpub011:0/16] 2024-02-05 22:53:56,834 (trainer:762) INFO: 29epoch:train:7001-7100batch: iter_time=7.964e-05, forward_time=0.321, loss_ctc=55.191, loss_att=47.023, acc=0.764, loss=49.473, backward_time=0.320, grad_norm=40.713, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.874e-04, train_time=1.613 +[gpub011:0/16] 2024-02-05 22:55:59,702 (trainer:762) INFO: 29epoch:train:7101-7200batch: iter_time=7.890e-05, forward_time=0.307, loss_ctc=53.785, loss_att=52.542, acc=0.750, loss=52.915, backward_time=0.310, grad_norm=41.416, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.874e-04, train_time=1.228 +[gpub011:0/16] 2024-02-05 22:58:32,712 (trainer:762) INFO: 29epoch:train:7201-7300batch: iter_time=7.733e-05, forward_time=0.369, loss_ctc=44.287, loss_att=44.065, acc=0.759, loss=44.132, backward_time=0.306, grad_norm=36.139, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.874e-04, train_time=1.530 +[gpub011:0/16] 2024-02-05 23:00:47,191 (trainer:762) INFO: 29epoch:train:7301-7400batch: iter_time=4.982e-04, forward_time=0.313, loss_ctc=51.196, loss_att=48.816, acc=0.764, loss=49.530, backward_time=0.302, grad_norm=44.667, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.874e-04, train_time=1.345 +[gpub011:0/16] 2024-02-05 23:02:47,274 (trainer:762) INFO: 29epoch:train:7401-7500batch: iter_time=7.825e-05, forward_time=0.317, loss_ctc=41.967, loss_att=45.259, acc=0.747, loss=44.271, backward_time=0.310, grad_norm=34.889, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.873e-04, train_time=1.201 +[gpub011:0/16] 2024-02-05 23:03:07,303 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-05 23:03:26,467 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 23:03:30,178 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 23:03:30,178 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-05 23:03:30,182 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 23:11:10,270 (trainer:762) INFO: 29epoch:train:7501-7600batch: iter_time=3.702, forward_time=0.355, loss_ctc=43.004, loss_att=37.907, acc=0.786, loss=39.436, backward_time=0.314, grad_norm=37.104, clip=100.000, loss_scale=5.971e+33, optim_step_time=0.099, optim0_lr0=1.873e-04, train_time=5.030 +[gpub011:0/16] 2024-02-05 23:13:27,523 (trainer:762) INFO: 29epoch:train:7601-7700batch: iter_time=8.325e-05, forward_time=0.317, loss_ctc=49.778, loss_att=50.457, acc=0.743, loss=50.253, backward_time=0.317, grad_norm=43.960, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.873e-04, train_time=1.372 +[gpub011:0/16] 2024-02-05 23:15:58,902 (trainer:762) INFO: 29epoch:train:7701-7800batch: iter_time=7.850e-05, forward_time=0.389, loss_ctc=47.109, loss_att=48.355, acc=0.756, loss=47.981, backward_time=0.324, grad_norm=42.352, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.873e-04, train_time=1.513 +[gpub011:0/16] 2024-02-05 23:18:13,826 (trainer:762) INFO: 29epoch:train:7801-7900batch: iter_time=3.912e-04, forward_time=0.346, loss_ctc=55.129, loss_att=49.921, acc=0.744, loss=51.483, backward_time=0.307, grad_norm=47.168, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.873e-04, train_time=1.349 +[gpub011:0/16] 2024-02-05 23:20:54,570 (trainer:762) INFO: 29epoch:train:7901-8000batch: iter_time=7.849e-05, forward_time=0.314, loss_ctc=43.506, loss_att=46.221, acc=0.742, loss=45.407, backward_time=0.304, grad_norm=40.563, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.872e-04, train_time=1.607 +[gpub011:0/16] 2024-02-05 23:23:16,745 (trainer:762) INFO: 29epoch:train:8001-8100batch: iter_time=1.157e-04, forward_time=0.376, loss_ctc=45.575, loss_att=43.107, acc=0.751, loss=43.848, backward_time=0.326, grad_norm=43.303, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=1.872e-04, train_time=1.422 +[gpub011:0/16] 2024-02-05 23:25:21,767 (trainer:762) INFO: 29epoch:train:8101-8200batch: iter_time=8.110e-05, forward_time=0.320, loss_ctc=52.001, loss_att=47.944, acc=0.754, loss=49.161, backward_time=0.314, grad_norm=39.658, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.872e-04, train_time=1.250 +[gpub011:0/16] 2024-02-05 23:27:17,043 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-05 23:28:11,099 (trainer:762) INFO: 29epoch:train:8201-8300batch: iter_time=8.805e-05, forward_time=0.361, loss_ctc=51.963, loss_att=51.134, acc=0.739, loss=51.383, backward_time=0.312, grad_norm=38.608, clip=100.000, loss_scale=8.339e+33, optim_step_time=0.112, optim0_lr0=1.872e-04, train_time=1.693 +[gpub011:0/16] 2024-02-05 23:30:12,866 (trainer:762) INFO: 29epoch:train:8301-8400batch: iter_time=8.946e-04, forward_time=0.337, loss_ctc=55.970, loss_att=51.663, acc=0.752, loss=52.955, backward_time=0.314, grad_norm=47.188, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.872e-04, train_time=1.218 +[gpub011:0/16] 2024-02-05 23:32:38,195 (trainer:762) INFO: 29epoch:train:8401-8500batch: iter_time=7.758e-05, forward_time=0.354, loss_ctc=45.842, loss_att=42.193, acc=0.752, loss=43.288, backward_time=0.308, grad_norm=37.459, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.871e-04, train_time=1.453 +[gpub011:0/16] 2024-02-05 23:35:14,606 (trainer:762) INFO: 29epoch:train:8501-8600batch: iter_time=7.953e-05, forward_time=0.392, loss_ctc=46.118, loss_att=45.249, acc=0.759, loss=45.510, backward_time=0.319, grad_norm=37.361, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.871e-04, train_time=1.564 +[gpub011:0/16] 2024-02-05 23:37:39,313 (trainer:762) INFO: 29epoch:train:8601-8700batch: iter_time=8.562e-05, forward_time=0.334, loss_ctc=48.053, loss_att=49.141, acc=0.742, loss=48.815, backward_time=0.326, grad_norm=47.951, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.871e-04, train_time=1.448 +[gpub011:0/16] 2024-02-05 23:39:03,278 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-05 23:39:22,165 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-05 23:39:25,698 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-05 23:39:25,698 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-05 23:39:25,749 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-05 23:46:50,827 (trainer:762) INFO: 29epoch:train:8701-8800batch: iter_time=4.071, forward_time=0.392, loss_ctc=41.394, loss_att=43.817, acc=0.760, loss=43.090, backward_time=0.307, grad_norm=34.378, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.871e-04, train_time=5.515 +[gpub011:0/16] 2024-02-05 23:48:54,503 (trainer:762) INFO: 29epoch:train:8801-8900batch: iter_time=7.855e-05, forward_time=0.293, loss_ctc=44.527, loss_att=46.084, acc=0.757, loss=45.617, backward_time=0.308, grad_norm=41.463, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.870e-04, train_time=1.236 +[gpub011:0/16] 2024-02-05 23:51:38,880 (trainer:762) INFO: 29epoch:train:8901-9000batch: iter_time=8.104e-05, forward_time=0.317, loss_ctc=53.163, loss_att=51.876, acc=0.763, loss=52.262, backward_time=0.316, grad_norm=44.824, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.870e-04, train_time=1.644 +[gpub011:0/16] 2024-02-05 23:53:49,580 (trainer:762) INFO: 29epoch:train:9001-9100batch: iter_time=8.036e-05, forward_time=0.393, loss_ctc=46.171, loss_att=43.211, acc=0.764, loss=44.099, backward_time=0.309, grad_norm=41.211, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.870e-04, train_time=1.307 +[gpub011:0/16] 2024-02-05 23:56:18,902 (trainer:762) INFO: 29epoch:train:9101-9200batch: iter_time=8.215e-05, forward_time=0.326, loss_ctc=54.588, loss_att=52.269, acc=0.743, loss=52.964, backward_time=0.302, grad_norm=45.987, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.870e-04, train_time=1.492 +[gpub011:0/16] 2024-02-05 23:58:38,685 (trainer:762) INFO: 29epoch:train:9201-9300batch: iter_time=7.927e-05, forward_time=0.339, loss_ctc=37.674, loss_att=33.793, acc=0.780, loss=34.958, backward_time=0.307, grad_norm=36.169, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.870e-04, train_time=1.398 +[gpub011:0/16] 2024-02-06 00:01:04,541 (trainer:762) INFO: 29epoch:train:9301-9400batch: iter_time=8.076e-05, forward_time=0.353, loss_ctc=53.533, loss_att=47.893, acc=0.766, loss=49.585, backward_time=0.330, grad_norm=41.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.869e-04, train_time=1.458 +[gpub011:0/16] 2024-02-06 00:03:20,441 (trainer:762) INFO: 29epoch:train:9401-9500batch: iter_time=4.183e-04, forward_time=0.326, loss_ctc=46.323, loss_att=50.111, acc=0.737, loss=48.974, backward_time=0.300, grad_norm=43.740, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.869e-04, train_time=1.358 +[gpub011:0/16] 2024-02-06 00:06:04,457 (trainer:762) INFO: 29epoch:train:9501-9600batch: iter_time=8.122e-05, forward_time=0.356, loss_ctc=54.178, loss_att=46.818, acc=0.764, loss=49.026, backward_time=0.349, grad_norm=40.403, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.869e-04, train_time=1.641 +[gpub011:0/16] 2024-02-06 00:08:08,169 (trainer:762) INFO: 29epoch:train:9601-9700batch: iter_time=8.295e-05, forward_time=0.310, loss_ctc=54.230, loss_att=51.610, acc=0.754, loss=52.396, backward_time=0.312, grad_norm=45.093, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.869e-04, train_time=1.237 +[gpub011:0/16] 2024-02-06 00:10:36,949 (trainer:762) INFO: 29epoch:train:9701-9800batch: iter_time=8.025e-05, forward_time=0.309, loss_ctc=43.982, loss_att=43.824, acc=0.761, loss=43.871, backward_time=0.318, grad_norm=36.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.868e-04, train_time=1.487 +[gpub011:0/16] 2024-02-06 00:12:59,171 (trainer:762) INFO: 29epoch:train:9801-9900batch: iter_time=8.366e-05, forward_time=0.352, loss_ctc=50.916, loss_att=48.540, acc=0.764, loss=49.253, backward_time=0.323, grad_norm=46.552, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.868e-04, train_time=1.422 +[gpub011:0/16] 2024-02-06 00:13:58,719 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 00:15:13,753 (trainer:762) INFO: 29epoch:train:9901-10000batch: iter_time=7.952e-05, forward_time=0.311, loss_ctc=41.817, loss_att=46.371, acc=0.743, loss=45.005, backward_time=0.304, grad_norm=36.762, clip=100.000, loss_scale=3.566e+33, optim_step_time=0.094, optim0_lr0=1.868e-04, train_time=1.346 +[gpub011:0/16] 2024-02-06 00:15:33,841 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-06 00:15:53,021 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 00:15:56,489 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 00:15:56,489 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-06 00:15:56,614 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 00:23:41,947 (trainer:762) INFO: 29epoch:train:10001-10100batch: iter_time=3.824, forward_time=0.392, loss_ctc=42.940, loss_att=37.264, acc=0.787, loss=38.967, backward_time=0.318, grad_norm=38.044, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.868e-04, train_time=5.082 +[gpub011:0/16] 2024-02-06 00:26:04,158 (trainer:762) INFO: 29epoch:train:10101-10200batch: iter_time=8.671e-05, forward_time=0.291, loss_ctc=49.539, loss_att=49.680, acc=0.746, loss=49.638, backward_time=0.295, grad_norm=43.794, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.868e-04, train_time=1.422 +[gpub011:0/16] 2024-02-06 00:28:44,124 (trainer:762) INFO: 29epoch:train:10201-10300batch: iter_time=8.485e-05, forward_time=0.408, loss_ctc=46.790, loss_att=48.178, acc=0.757, loss=47.762, backward_time=0.328, grad_norm=41.689, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.108, optim0_lr0=1.867e-04, train_time=1.599 +[gpub011:0/16] 2024-02-06 00:30:47,026 (trainer:762) INFO: 29epoch:train:10301-10400batch: iter_time=7.751e-05, forward_time=0.294, loss_ctc=54.799, loss_att=49.031, acc=0.745, loss=50.761, backward_time=0.298, grad_norm=46.957, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.867e-04, train_time=1.229 +[gpub011:0/16] 2024-02-06 00:33:24,827 (trainer:762) INFO: 29epoch:train:10401-10500batch: iter_time=7.707e-05, forward_time=0.288, loss_ctc=43.163, loss_att=45.332, acc=0.744, loss=44.682, backward_time=0.292, grad_norm=38.590, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.867e-04, train_time=1.577 +[gpub011:0/16] 2024-02-06 00:35:39,918 (trainer:762) INFO: 29epoch:train:10501-10600batch: iter_time=1.286e-04, forward_time=0.379, loss_ctc=45.211, loss_att=42.591, acc=0.754, loss=43.377, backward_time=0.341, grad_norm=41.561, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.867e-04, train_time=1.351 +[gpub011:0/16] 2024-02-06 00:38:16,578 (trainer:762) INFO: 29epoch:train:10601-10700batch: iter_time=7.619e-05, forward_time=0.325, loss_ctc=52.154, loss_att=47.566, acc=0.754, loss=48.942, backward_time=0.313, grad_norm=39.208, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.867e-04, train_time=1.566 +[gpub011:0/16] 2024-02-06 00:40:55,135 (trainer:762) INFO: 29epoch:train:10701-10800batch: iter_time=7.756e-05, forward_time=0.291, loss_ctc=52.056, loss_att=50.634, acc=0.742, loss=51.061, backward_time=0.295, grad_norm=38.100, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.866e-04, train_time=1.586 +[gpub011:0/16] 2024-02-06 00:43:13,336 (trainer:762) INFO: 29epoch:train:10801-10900batch: iter_time=1.821e-04, forward_time=0.363, loss_ctc=55.232, loss_att=51.038, acc=0.754, loss=52.296, backward_time=0.302, grad_norm=46.326, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.866e-04, train_time=1.380 +[gpub011:0/16] 2024-02-06 00:45:53,350 (trainer:762) INFO: 29epoch:train:10901-11000batch: iter_time=7.856e-05, forward_time=0.388, loss_ctc=46.214, loss_att=42.149, acc=0.753, loss=43.369, backward_time=0.331, grad_norm=37.833, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=1.866e-04, train_time=1.602 +[gpub011:0/16] 2024-02-06 00:48:10,540 (trainer:762) INFO: 29epoch:train:11001-11100batch: iter_time=7.872e-05, forward_time=0.291, loss_ctc=46.445, loss_att=44.794, acc=0.761, loss=45.289, backward_time=0.297, grad_norm=36.886, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.866e-04, train_time=1.372 +[gpub011:0/16] 2024-02-06 00:50:40,220 (trainer:762) INFO: 29epoch:train:11101-11200batch: iter_time=8.025e-05, forward_time=0.300, loss_ctc=47.796, loss_att=49.106, acc=0.743, loss=48.713, backward_time=0.300, grad_norm=45.076, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.865e-04, train_time=1.497 +[gpub011:0/16] 2024-02-06 00:52:12,344 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-06 00:52:31,673 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 00:52:35,255 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 00:52:35,255 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-06 00:52:35,262 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 00:59:24,973 (trainer:762) INFO: 29epoch:train:11201-11300batch: iter_time=3.770, forward_time=0.408, loss_ctc=41.499, loss_att=43.718, acc=0.760, loss=43.052, backward_time=0.334, grad_norm=35.480, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.865e-04, train_time=5.247 +[gpub011:0/16] 2024-02-06 01:01:50,433 (trainer:762) INFO: 29epoch:train:11301-11400batch: iter_time=8.236e-05, forward_time=0.333, loss_ctc=44.034, loss_att=45.356, acc=0.761, loss=44.959, backward_time=0.300, grad_norm=41.090, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.865e-04, train_time=1.454 +[gpub011:0/16] 2024-02-06 01:04:07,740 (trainer:762) INFO: 29epoch:train:11401-11500batch: iter_time=8.165e-05, forward_time=0.423, loss_ctc=52.435, loss_att=51.778, acc=0.763, loss=51.976, backward_time=0.328, grad_norm=46.307, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.865e-04, train_time=1.372 +[gpub011:0/16] 2024-02-06 01:06:43,013 (trainer:762) INFO: 29epoch:train:11501-11600batch: iter_time=8.172e-05, forward_time=0.292, loss_ctc=46.261, loss_att=43.696, acc=0.764, loss=44.466, backward_time=0.298, grad_norm=40.296, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.865e-04, train_time=1.553 +[gpub011:0/16] 2024-02-06 01:09:09,543 (trainer:762) INFO: 29epoch:train:11601-11700batch: iter_time=8.088e-05, forward_time=0.441, loss_ctc=54.335, loss_att=52.947, acc=0.741, loss=53.363, backward_time=0.344, grad_norm=45.863, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.106, optim0_lr0=1.864e-04, train_time=1.465 +[gpub011:0/16] 2024-02-06 01:11:32,856 (trainer:762) INFO: 29epoch:train:11701-11800batch: iter_time=8.433e-05, forward_time=0.287, loss_ctc=37.064, loss_att=33.578, acc=0.779, loss=34.624, backward_time=0.292, grad_norm=37.433, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.864e-04, train_time=1.431 +[gpub011:0/16] 2024-02-06 01:13:30,230 (trainer:762) INFO: 29epoch:train:11801-11900batch: iter_time=2.106e-04, forward_time=0.300, loss_ctc=54.024, loss_att=48.609, acc=0.765, loss=50.234, backward_time=0.310, grad_norm=39.844, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.864e-04, train_time=1.175 +[gpub011:0/16] 2024-02-06 01:15:55,542 (trainer:762) INFO: 29epoch:train:11901-12000batch: iter_time=7.994e-05, forward_time=0.397, loss_ctc=46.318, loss_att=50.137, acc=0.737, loss=48.991, backward_time=0.341, grad_norm=37.311, clip=100.000, loss_scale=4.206e+33, optim_step_time=0.098, optim0_lr0=1.864e-04, train_time=1.452 +[gpub011:0/16] 2024-02-06 01:18:13,582 (trainer:762) INFO: 29epoch:train:12001-12100batch: iter_time=8.039e-05, forward_time=0.291, loss_ctc=54.197, loss_att=47.036, acc=0.764, loss=49.184, backward_time=0.296, grad_norm=40.089, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.863e-04, train_time=1.381 +[gpub011:0/16] 2024-02-06 01:20:39,036 (trainer:762) INFO: 29epoch:train:12101-12200batch: iter_time=9.028e-05, forward_time=0.390, loss_ctc=53.139, loss_att=52.619, acc=0.751, loss=52.775, backward_time=0.356, grad_norm=43.974, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.863e-04, train_time=1.454 +[gpub011:0/16] 2024-02-06 01:22:57,867 (trainer:762) INFO: 29epoch:train:12201-12300batch: iter_time=8.529e-05, forward_time=0.289, loss_ctc=43.670, loss_att=43.686, acc=0.760, loss=43.681, backward_time=0.294, grad_norm=36.119, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.863e-04, train_time=1.388 +[gpub011:0/16] 2024-02-06 01:25:05,238 (trainer:762) INFO: 29epoch:train:12301-12400batch: iter_time=2.072e-04, forward_time=0.301, loss_ctc=49.643, loss_att=47.576, acc=0.766, loss=48.196, backward_time=0.302, grad_norm=41.180, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.863e-04, train_time=1.274 +[gpub011:0/16] 2024-02-06 01:27:44,969 (trainer:762) INFO: 29epoch:train:12401-12500batch: iter_time=8.539e-05, forward_time=0.367, loss_ctc=41.485, loss_att=45.463, acc=0.748, loss=44.270, backward_time=0.380, grad_norm=36.565, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.863e-04, train_time=1.597 +[gpub011:0/16] 2024-02-06 01:28:04,997 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-06 01:28:24,349 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 01:28:28,187 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 01:28:28,187 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-06 01:28:28,190 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 01:36:08,979 (trainer:762) INFO: 29epoch:train:12501-12600batch: iter_time=3.566, forward_time=0.287, loss_ctc=42.791, loss_att=36.737, acc=0.788, loss=38.553, backward_time=0.292, grad_norm=36.920, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.862e-04, train_time=5.040 +[gpub011:0/16] 2024-02-06 01:38:49,419 (trainer:762) INFO: 29epoch:train:12601-12700batch: iter_time=8.649e-05, forward_time=0.401, loss_ctc=49.693, loss_att=48.866, acc=0.747, loss=49.114, backward_time=0.348, grad_norm=42.743, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.862e-04, train_time=1.604 +[gpub011:0/16] 2024-02-06 01:41:11,441 (trainer:762) INFO: 29epoch:train:12701-12800batch: iter_time=8.065e-05, forward_time=0.291, loss_ctc=47.113, loss_att=47.814, acc=0.760, loss=47.604, backward_time=0.296, grad_norm=42.981, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.862e-04, train_time=1.419 +[gpub011:0/16] 2024-02-06 01:43:32,612 (trainer:762) INFO: 29epoch:train:12801-12900batch: iter_time=1.814e-04, forward_time=0.311, loss_ctc=54.538, loss_att=49.952, acc=0.742, loss=51.328, backward_time=0.301, grad_norm=50.652, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.862e-04, train_time=1.412 +[gpub011:0/16] 2024-02-06 01:46:11,851 (trainer:762) INFO: 29epoch:train:12901-13000batch: iter_time=8.486e-05, forward_time=0.357, loss_ctc=43.274, loss_att=45.724, acc=0.742, loss=44.989, backward_time=0.358, grad_norm=37.844, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.862e-04, train_time=1.592 +[gpub011:0/16] 2024-02-06 01:48:33,270 (trainer:762) INFO: 29epoch:train:13001-13100batch: iter_time=8.281e-05, forward_time=0.306, loss_ctc=44.567, loss_att=42.460, acc=0.754, loss=43.092, backward_time=0.293, grad_norm=40.679, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.861e-04, train_time=1.414 +[gpub011:0/16] 2024-02-06 01:50:43,900 (trainer:762) INFO: 29epoch:train:13101-13200batch: iter_time=8.608e-05, forward_time=0.293, loss_ctc=51.508, loss_att=46.503, acc=0.759, loss=48.005, backward_time=0.298, grad_norm=38.386, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.861e-04, train_time=1.306 +[gpub011:0/16] 2024-02-06 01:53:56,378 (trainer:762) INFO: 29epoch:train:13201-13300batch: iter_time=8.636e-05, forward_time=0.428, loss_ctc=51.439, loss_att=50.582, acc=0.740, loss=50.839, backward_time=0.345, grad_norm=36.606, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.861e-04, train_time=1.925 +[gpub011:0/16] 2024-02-06 01:56:06,360 (trainer:762) INFO: 29epoch:train:13301-13400batch: iter_time=8.324e-05, forward_time=0.291, loss_ctc=54.660, loss_att=50.719, acc=0.754, loss=51.901, backward_time=0.298, grad_norm=44.924, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.861e-04, train_time=1.300 +[gpub011:0/16] 2024-02-06 01:58:31,537 (trainer:762) INFO: 29epoch:train:13401-13500batch: iter_time=7.866e-05, forward_time=0.288, loss_ctc=45.494, loss_att=41.469, acc=0.755, loss=42.676, backward_time=0.294, grad_norm=39.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.860e-04, train_time=1.452 +[gpub011:0/16] 2024-02-06 02:01:01,297 (trainer:762) INFO: 29epoch:train:13501-13600batch: iter_time=8.278e-05, forward_time=0.326, loss_ctc=46.540, loss_att=44.618, acc=0.763, loss=45.194, backward_time=0.307, grad_norm=37.374, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.860e-04, train_time=1.497 +[gpub011:0/16] 2024-02-06 02:03:26,659 (trainer:762) INFO: 29epoch:train:13601-13700batch: iter_time=7.638e-05, forward_time=0.390, loss_ctc=47.801, loss_att=48.068, acc=0.750, loss=47.988, backward_time=0.348, grad_norm=44.984, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.860e-04, train_time=1.453 +[gpub011:0/16] 2024-02-06 02:05:13,868 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-06 02:05:33,282 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 02:05:36,885 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 02:05:36,885 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-06 02:05:36,929 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 02:12:43,607 (trainer:762) INFO: 29epoch:train:13701-13800batch: iter_time=4.043, forward_time=0.297, loss_ctc=41.142, loss_att=43.036, acc=0.758, loss=42.468, backward_time=0.292, grad_norm=35.492, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.860e-04, train_time=5.569 +[gpub011:0/16] 2024-02-06 02:15:10,284 (trainer:762) INFO: 29epoch:train:13801-13900batch: iter_time=8.731e-05, forward_time=0.427, loss_ctc=43.957, loss_att=41.892, acc=0.765, loss=42.511, backward_time=0.313, grad_norm=41.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.860e-04, train_time=1.467 +[gpub011:0/16] 2024-02-06 02:17:44,990 (trainer:762) INFO: 29epoch:train:13901-14000batch: iter_time=8.623e-05, forward_time=0.293, loss_ctc=52.353, loss_att=51.111, acc=0.756, loss=51.484, backward_time=0.298, grad_norm=44.408, clip=100.000, loss_scale=8.412e+33, optim_step_time=0.094, optim0_lr0=1.859e-04, train_time=1.546 +[gpub011:0/16] 2024-02-06 02:19:55,297 (trainer:762) INFO: 29epoch:train:14001-14100batch: iter_time=8.838e-05, forward_time=0.307, loss_ctc=45.706, loss_att=42.734, acc=0.763, loss=43.626, backward_time=0.310, grad_norm=39.000, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.859e-04, train_time=1.304 +[gpub011:0/16] 2024-02-06 02:22:31,093 (trainer:762) INFO: 29epoch:train:14101-14200batch: iter_time=8.907e-05, forward_time=0.381, loss_ctc=54.352, loss_att=53.144, acc=0.728, loss=53.507, backward_time=0.315, grad_norm=47.041, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.859e-04, train_time=1.558 +[gpub011:0/16] 2024-02-06 02:24:44,767 (trainer:762) INFO: 29epoch:train:14201-14300batch: iter_time=9.095e-05, forward_time=0.295, loss_ctc=37.283, loss_att=32.653, acc=0.775, loss=34.042, backward_time=0.293, grad_norm=36.833, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.859e-04, train_time=1.337 +[gpub011:0/16] 2024-02-06 02:27:16,986 (trainer:762) INFO: 29epoch:train:14301-14400batch: iter_time=9.149e-05, forward_time=0.293, loss_ctc=53.340, loss_att=47.932, acc=0.758, loss=49.555, backward_time=0.297, grad_norm=39.560, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.859e-04, train_time=1.521 +[gpub011:0/16] 2024-02-06 02:29:27,981 (trainer:762) INFO: 29epoch:train:14401-14500batch: iter_time=0.001, forward_time=0.392, loss_ctc=46.538, loss_att=47.833, acc=0.736, loss=47.444, backward_time=0.350, grad_norm=35.876, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.114, optim0_lr0=1.858e-04, train_time=1.309 +[gpub011:0/16] 2024-02-06 02:32:10,615 (trainer:762) INFO: 29epoch:train:14501-14600batch: iter_time=9.340e-05, forward_time=0.296, loss_ctc=54.131, loss_att=46.935, acc=0.758, loss=49.094, backward_time=0.294, grad_norm=40.751, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.858e-04, train_time=1.628 +[gpub011:0/16] 2024-02-06 02:34:26,694 (trainer:762) INFO: 29epoch:train:14601-14700batch: iter_time=5.988e-04, forward_time=0.318, loss_ctc=52.778, loss_att=52.475, acc=0.743, loss=52.566, backward_time=0.296, grad_norm=43.272, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.858e-04, train_time=1.361 +[gpub011:0/16] 2024-02-06 02:36:58,572 (trainer:762) INFO: 29epoch:train:14701-14800batch: iter_time=3.714e-04, forward_time=0.375, loss_ctc=43.724, loss_att=42.991, acc=0.761, loss=43.211, backward_time=0.369, grad_norm=36.927, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.858e-04, train_time=1.517 +[gpub011:0/16] 2024-02-06 02:39:30,466 (trainer:762) INFO: 29epoch:train:14801-14900batch: iter_time=8.441e-05, forward_time=0.298, loss_ctc=49.671, loss_att=48.055, acc=0.758, loss=48.540, backward_time=0.302, grad_norm=44.908, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.857e-04, train_time=1.520 +[gpub011:0/16] 2024-02-06 02:42:01,265 (trainer:762) INFO: 29epoch:train:14901-15000batch: iter_time=9.147e-05, forward_time=0.311, loss_ctc=41.421, loss_att=45.267, acc=0.736, loss=44.113, backward_time=0.295, grad_norm=35.526, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.857e-04, train_time=1.508 +[gpub011:0/16] 2024-02-06 03:21:57,339 (trainer:361) INFO: 29epoch results: [train] iter_time=0.288, forward_time=0.343, loss_ctc=48.829, loss_att=46.758, acc=0.754, loss=47.379, backward_time=0.316, grad_norm=41.335, clip=100.000, loss_scale=5.354e+33, optim_step_time=0.098, optim0_lr0=1.873e-04, train_time=1.742, time=7 hours, 15 minutes and 57.84 seconds, total_count=465000, gpu_max_cached_mem_GB=40.000, [valid] loss_ctc=37.023, cer_ctc=0.190, loss_att=39.435, acc=0.674, cer=0.340, wer=0.994, loss=38.711, time=39 minutes and 31.62 seconds, total_count=144801, gpu_max_cached_mem_GB=40.000 +[gpub011:0/16] 2024-02-06 03:22:19,309 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-06 03:22:19,420 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/23epoch.pth, exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/24epoch.pth +[gpub011:0/16] 2024-02-06 03:22:19,420 (trainer:290) INFO: 30/45epoch started. Estimated time to finish: 5 days, 6 hours and 53 minutes +[gpub011:0/16] 2024-02-06 03:22:19,430 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-06 03:22:37,796 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 03:22:41,174 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 03:22:41,174 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-06 03:22:41,177 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 03:30:29,892 (trainer:762) INFO: 30epoch:train:1-100batch: iter_time=3.552, forward_time=0.304, loss_ctc=47.900, loss_att=44.820, acc=0.762, loss=45.744, backward_time=0.307, grad_norm=36.143, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.857e-04, train_time=4.904 +[gpub011:0/16] 2024-02-06 03:32:33,977 (trainer:762) INFO: 30epoch:train:101-200batch: iter_time=6.909e-04, forward_time=0.379, loss_ctc=50.111, loss_att=50.083, acc=0.762, loss=50.091, backward_time=0.336, grad_norm=45.272, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.857e-04, train_time=1.241 +[gpub011:0/16] 2024-02-06 03:33:50,973 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 03:34:40,753 (trainer:762) INFO: 30epoch:train:201-300batch: iter_time=8.330e-05, forward_time=0.289, loss_ctc=48.536, loss_att=42.408, acc=0.745, loss=44.247, backward_time=0.301, grad_norm=41.241, clip=100.000, loss_scale=8.549e+33, optim_step_time=0.095, optim0_lr0=1.857e-04, train_time=1.268 +[gpub011:0/16] 2024-02-06 03:37:17,846 (trainer:762) INFO: 30epoch:train:301-400batch: iter_time=8.105e-05, forward_time=0.314, loss_ctc=43.371, loss_att=41.672, acc=0.754, loss=42.182, backward_time=0.312, grad_norm=40.092, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.856e-04, train_time=1.570 +[gpub011:0/16] 2024-02-06 03:39:15,995 (trainer:762) INFO: 30epoch:train:401-500batch: iter_time=8.173e-05, forward_time=0.322, loss_ctc=46.125, loss_att=46.222, acc=0.760, loss=46.193, backward_time=0.310, grad_norm=37.703, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.856e-04, train_time=1.182 +[gpub011:0/16] 2024-02-06 03:41:43,945 (trainer:762) INFO: 30epoch:train:501-600batch: iter_time=8.725e-05, forward_time=0.351, loss_ctc=52.627, loss_att=44.209, acc=0.757, loss=46.735, backward_time=0.322, grad_norm=39.650, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.856e-04, train_time=1.479 +[gpub011:0/16] 2024-02-06 03:44:10,651 (trainer:762) INFO: 30epoch:train:601-700batch: iter_time=7.803e-05, forward_time=0.303, loss_ctc=54.151, loss_att=45.805, acc=0.740, loss=48.309, backward_time=0.305, grad_norm=43.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.856e-04, train_time=1.467 +[gpub011:0/16] 2024-02-06 03:46:15,078 (trainer:762) INFO: 30epoch:train:701-800batch: iter_time=7.970e-05, forward_time=0.304, loss_ctc=50.272, loss_att=47.992, acc=0.747, loss=48.676, backward_time=0.330, grad_norm=43.690, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.856e-04, train_time=1.243 +[gpub011:0/16] 2024-02-06 03:48:53,195 (trainer:762) INFO: 30epoch:train:801-900batch: iter_time=7.938e-05, forward_time=0.335, loss_ctc=46.237, loss_att=44.018, acc=0.767, loss=44.684, backward_time=0.304, grad_norm=36.753, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.855e-04, train_time=1.582 +[gpub011:0/16] 2024-02-06 03:51:05,833 (trainer:762) INFO: 30epoch:train:901-1000batch: iter_time=6.965e-04, forward_time=0.354, loss_ctc=55.861, loss_att=53.451, acc=0.740, loss=54.174, backward_time=0.298, grad_norm=46.198, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.855e-04, train_time=1.326 +[gpub011:0/16] 2024-02-06 03:53:16,994 (trainer:762) INFO: 30epoch:train:1001-1100batch: iter_time=2.655e-04, forward_time=0.312, loss_ctc=48.242, loss_att=46.487, acc=0.732, loss=47.014, backward_time=0.327, grad_norm=40.416, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.855e-04, train_time=1.311 +[gpub011:0/16] 2024-02-06 03:55:49,639 (trainer:762) INFO: 30epoch:train:1101-1200batch: iter_time=8.136e-05, forward_time=0.302, loss_ctc=47.946, loss_att=41.903, acc=0.759, loss=43.716, backward_time=0.305, grad_norm=39.704, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.855e-04, train_time=1.527 +[gpub011:0/16] 2024-02-06 03:57:14,485 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-06 03:57:33,523 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 03:57:37,052 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 03:57:37,052 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-06 03:57:37,056 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 04:04:02,157 (trainer:762) INFO: 30epoch:train:1201-1300batch: iter_time=3.621, forward_time=0.369, loss_ctc=48.698, loss_att=46.466, acc=0.772, loss=47.136, backward_time=0.311, grad_norm=39.795, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.854e-04, train_time=4.925 +[gpub011:0/16] 2024-02-06 04:06:40,060 (trainer:762) INFO: 30epoch:train:1301-1400batch: iter_time=8.304e-05, forward_time=0.307, loss_ctc=46.731, loss_att=48.962, acc=0.763, loss=48.293, backward_time=0.313, grad_norm=42.061, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.854e-04, train_time=1.578 +[gpub011:0/16] 2024-02-06 04:09:03,002 (trainer:762) INFO: 30epoch:train:1401-1500batch: iter_time=8.244e-05, forward_time=0.300, loss_ctc=49.943, loss_att=47.846, acc=0.757, loss=48.475, backward_time=0.297, grad_norm=38.569, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.854e-04, train_time=1.430 +[gpub011:0/16] 2024-02-06 04:11:33,647 (trainer:762) INFO: 30epoch:train:1501-1600batch: iter_time=8.457e-05, forward_time=0.380, loss_ctc=43.511, loss_att=40.217, acc=0.766, loss=41.205, backward_time=0.326, grad_norm=38.262, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.854e-04, train_time=1.506 +[gpub011:0/16] 2024-02-06 04:13:59,277 (trainer:762) INFO: 30epoch:train:1601-1700batch: iter_time=8.785e-05, forward_time=0.293, loss_ctc=45.448, loss_att=46.030, acc=0.746, loss=45.856, backward_time=0.298, grad_norm=39.171, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.854e-04, train_time=1.456 +[gpub011:0/16] 2024-02-06 04:16:29,032 (trainer:762) INFO: 30epoch:train:1701-1800batch: iter_time=8.051e-05, forward_time=0.317, loss_ctc=48.988, loss_att=46.460, acc=0.771, loss=47.218, backward_time=0.297, grad_norm=38.233, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.853e-04, train_time=1.497 +[gpub011:0/16] 2024-02-06 04:18:56,612 (trainer:762) INFO: 30epoch:train:1801-1900batch: iter_time=8.374e-05, forward_time=0.356, loss_ctc=50.387, loss_att=43.007, acc=0.760, loss=45.221, backward_time=0.326, grad_norm=38.502, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.853e-04, train_time=1.476 +[gpub011:0/16] 2024-02-06 04:21:13,300 (trainer:762) INFO: 30epoch:train:1901-2000batch: iter_time=8.099e-05, forward_time=0.297, loss_ctc=51.354, loss_att=48.053, acc=0.746, loss=49.043, backward_time=0.298, grad_norm=42.991, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.853e-04, train_time=1.367 +[gpub011:0/16] 2024-02-06 04:23:41,318 (trainer:762) INFO: 30epoch:train:2001-2100batch: iter_time=8.556e-05, forward_time=0.316, loss_ctc=46.251, loss_att=44.548, acc=0.768, loss=45.059, backward_time=0.302, grad_norm=40.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.853e-04, train_time=1.480 +[gpub011:0/16] 2024-02-06 04:26:04,663 (trainer:762) INFO: 30epoch:train:2101-2200batch: iter_time=8.121e-05, forward_time=0.307, loss_ctc=55.454, loss_att=53.898, acc=0.750, loss=54.365, backward_time=0.301, grad_norm=43.921, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.853e-04, train_time=1.433 +[gpub011:0/16] 2024-02-06 04:28:41,612 (trainer:762) INFO: 30epoch:train:2201-2300batch: iter_time=8.324e-05, forward_time=0.354, loss_ctc=49.425, loss_att=44.621, acc=0.753, loss=46.063, backward_time=0.316, grad_norm=36.699, clip=100.000, loss_scale=7.010e+33, optim_step_time=0.097, optim0_lr0=1.852e-04, train_time=1.570 +[gpub011:0/16] 2024-02-06 04:30:57,848 (trainer:762) INFO: 30epoch:train:2301-2400batch: iter_time=8.291e-05, forward_time=0.293, loss_ctc=48.326, loss_att=45.804, acc=0.769, loss=46.561, backward_time=0.301, grad_norm=38.951, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.852e-04, train_time=1.362 +[gpub011:0/16] 2024-02-06 04:33:25,356 (trainer:762) INFO: 30epoch:train:2401-2500batch: iter_time=8.178e-05, forward_time=0.301, loss_ctc=47.887, loss_att=43.046, acc=0.767, loss=44.498, backward_time=0.304, grad_norm=39.469, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.852e-04, train_time=1.475 +[gpub011:0/16] 2024-02-06 04:33:45,407 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-06 04:34:04,506 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 04:34:08,388 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 04:34:08,388 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-06 04:34:08,392 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 04:41:42,097 (trainer:762) INFO: 30epoch:train:2501-2600batch: iter_time=3.611, forward_time=0.379, loss_ctc=47.022, loss_att=43.748, acc=0.770, loss=44.730, backward_time=0.319, grad_norm=34.220, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.852e-04, train_time=4.967 +[gpub011:0/16] 2024-02-06 04:43:55,885 (trainer:762) INFO: 30epoch:train:2601-2700batch: iter_time=8.816e-05, forward_time=0.312, loss_ctc=49.728, loss_att=51.461, acc=0.769, loss=50.941, backward_time=0.299, grad_norm=42.490, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.852e-04, train_time=1.338 +[gpub011:0/16] 2024-02-06 04:46:13,231 (trainer:762) INFO: 30epoch:train:2701-2800batch: iter_time=7.945e-05, forward_time=0.346, loss_ctc=47.270, loss_att=41.458, acc=0.760, loss=43.202, backward_time=0.352, grad_norm=39.023, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.851e-04, train_time=1.373 +[gpub011:0/16] 2024-02-06 04:48:12,143 (trainer:762) INFO: 30epoch:train:2801-2900batch: iter_time=8.003e-05, forward_time=0.308, loss_ctc=42.548, loss_att=41.529, acc=0.760, loss=41.835, backward_time=0.298, grad_norm=37.299, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.851e-04, train_time=1.189 +[gpub011:0/16] 2024-02-06 04:50:37,538 (trainer:762) INFO: 30epoch:train:2901-3000batch: iter_time=8.775e-05, forward_time=0.346, loss_ctc=45.308, loss_att=46.581, acc=0.765, loss=46.199, backward_time=0.340, grad_norm=35.820, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=1.851e-04, train_time=1.454 +[gpub011:0/16] 2024-02-06 04:53:00,435 (trainer:762) INFO: 30epoch:train:3001-3100batch: iter_time=8.616e-05, forward_time=0.313, loss_ctc=51.213, loss_att=44.623, acc=0.765, loss=46.600, backward_time=0.300, grad_norm=40.445, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.851e-04, train_time=1.429 +[gpub011:0/16] 2024-02-06 04:55:05,328 (trainer:762) INFO: 30epoch:train:3101-3200batch: iter_time=1.326e-04, forward_time=0.371, loss_ctc=51.895, loss_att=44.723, acc=0.747, loss=46.875, backward_time=0.340, grad_norm=40.147, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=1.850e-04, train_time=1.248 +[gpub011:0/16] 2024-02-06 04:57:25,396 (trainer:762) INFO: 30epoch:train:3201-3300batch: iter_time=9.318e-05, forward_time=0.307, loss_ctc=49.028, loss_att=47.253, acc=0.761, loss=47.786, backward_time=0.313, grad_norm=43.026, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.850e-04, train_time=1.401 +[gpub011:0/16] 2024-02-06 04:59:53,715 (trainer:762) INFO: 30epoch:train:3301-3400batch: iter_time=1.002e-04, forward_time=0.334, loss_ctc=45.740, loss_att=43.747, acc=0.777, loss=44.345, backward_time=0.371, grad_norm=35.324, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.850e-04, train_time=1.483 +[gpub011:0/16] 2024-02-06 05:01:52,134 (trainer:762) INFO: 30epoch:train:3401-3500batch: iter_time=8.765e-05, forward_time=0.316, loss_ctc=54.579, loss_att=53.163, acc=0.746, loss=53.588, backward_time=0.306, grad_norm=43.214, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.850e-04, train_time=1.183 +[gpub011:0/16] 2024-02-06 05:04:24,793 (trainer:762) INFO: 30epoch:train:3501-3600batch: iter_time=4.233e-04, forward_time=0.361, loss_ctc=47.217, loss_att=45.167, acc=0.748, loss=45.782, backward_time=0.313, grad_norm=38.763, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.850e-04, train_time=1.527 +[gpub011:0/16] 2024-02-06 05:06:29,728 (trainer:762) INFO: 30epoch:train:3601-3700batch: iter_time=8.908e-05, forward_time=0.301, loss_ctc=47.286, loss_att=41.934, acc=0.770, loss=43.540, backward_time=0.319, grad_norm=38.782, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.849e-04, train_time=1.249 +[gpub011:0/16] 2024-02-06 05:07:58,977 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-06 05:08:18,089 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 05:08:21,655 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 05:08:21,655 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-06 05:08:21,659 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 05:14:24,749 (trainer:762) INFO: 30epoch:train:3701-3800batch: iter_time=3.475, forward_time=0.304, loss_ctc=47.287, loss_att=44.810, acc=0.777, loss=45.553, backward_time=0.309, grad_norm=38.203, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.849e-04, train_time=4.750 +[gpub011:0/16] 2024-02-06 05:16:53,834 (trainer:762) INFO: 30epoch:train:3801-3900batch: iter_time=0.003, forward_time=0.392, loss_ctc=45.582, loss_att=48.074, acc=0.766, loss=47.326, backward_time=0.313, grad_norm=39.479, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.849e-04, train_time=1.490 +[gpub011:0/16] 2024-02-06 05:18:57,084 (trainer:762) INFO: 30epoch:train:3901-4000batch: iter_time=8.386e-05, forward_time=0.291, loss_ctc=49.291, loss_att=45.983, acc=0.763, loss=46.975, backward_time=0.297, grad_norm=38.866, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.849e-04, train_time=1.233 +[gpub011:0/16] 2024-02-06 05:20:54,435 (trainer:762) INFO: 30epoch:train:4001-4100batch: iter_time=8.271e-05, forward_time=0.314, loss_ctc=43.158, loss_att=39.066, acc=0.771, loss=40.294, backward_time=0.298, grad_norm=37.386, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.849e-04, train_time=1.172 +[gpub011:0/16] 2024-02-06 05:23:02,497 (trainer:762) INFO: 30epoch:train:4101-4200batch: iter_time=8.424e-05, forward_time=0.290, loss_ctc=44.562, loss_att=44.496, acc=0.752, loss=44.516, backward_time=0.296, grad_norm=38.078, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.848e-04, train_time=1.282 +[gpub011:0/16] 2024-02-06 05:25:29,303 (trainer:762) INFO: 30epoch:train:4201-4300batch: iter_time=8.887e-05, forward_time=0.353, loss_ctc=47.972, loss_att=45.593, acc=0.774, loss=46.307, backward_time=0.330, grad_norm=37.077, clip=100.000, loss_scale=1.402e+34, optim_step_time=0.099, optim0_lr0=1.848e-04, train_time=1.467 +[gpub011:0/16] 2024-02-06 05:26:53,809 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 05:27:36,016 (trainer:762) INFO: 30epoch:train:4301-4400batch: iter_time=8.560e-05, forward_time=0.320, loss_ctc=49.496, loss_att=42.256, acc=0.764, loss=44.428, backward_time=0.297, grad_norm=38.372, clip=100.000, loss_scale=1.752e+34, optim_step_time=0.094, optim0_lr0=1.848e-04, train_time=1.267 +[gpub011:0/16] 2024-02-06 05:29:39,644 (trainer:762) INFO: 30epoch:train:4401-4500batch: iter_time=8.084e-05, forward_time=0.314, loss_ctc=50.723, loss_att=47.036, acc=0.749, loss=48.142, backward_time=0.299, grad_norm=42.097, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.848e-04, train_time=1.236 +[gpub011:0/16] 2024-02-06 05:31:53,775 (trainer:762) INFO: 30epoch:train:4501-4600batch: iter_time=9.170e-05, forward_time=0.289, loss_ctc=45.255, loss_att=43.655, acc=0.770, loss=44.135, backward_time=0.296, grad_norm=39.824, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.848e-04, train_time=1.342 +[gpub011:0/16] 2024-02-06 05:34:26,832 (trainer:762) INFO: 30epoch:train:4601-4700batch: iter_time=5.360e-04, forward_time=0.367, loss_ctc=54.500, loss_att=52.683, acc=0.755, loss=53.228, backward_time=0.400, grad_norm=42.483, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=1.847e-04, train_time=1.530 +[gpub011:0/16] 2024-02-06 05:36:26,659 (trainer:762) INFO: 30epoch:train:4701-4800batch: iter_time=8.455e-05, forward_time=0.288, loss_ctc=48.396, loss_att=44.058, acc=0.756, loss=45.360, backward_time=0.296, grad_norm=37.509, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.847e-04, train_time=1.198 +[gpub011:0/16] 2024-02-06 05:38:42,450 (trainer:762) INFO: 30epoch:train:4801-4900batch: iter_time=8.610e-05, forward_time=0.291, loss_ctc=47.938, loss_att=45.760, acc=0.770, loss=46.413, backward_time=0.296, grad_norm=40.408, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.847e-04, train_time=1.358 +[gpub011:0/16] 2024-02-06 05:41:09,267 (trainer:762) INFO: 30epoch:train:4901-5000batch: iter_time=8.393e-05, forward_time=0.409, loss_ctc=47.015, loss_att=42.618, acc=0.767, loss=43.937, backward_time=0.317, grad_norm=38.375, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.847e-04, train_time=1.468 +[gpub011:0/16] 2024-02-06 05:41:29,317 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-06 05:41:48,317 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 05:41:51,876 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 05:41:51,876 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-06 05:41:51,880 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 05:49:03,404 (trainer:762) INFO: 30epoch:train:5001-5100batch: iter_time=3.518, forward_time=0.311, loss_ctc=46.815, loss_att=44.464, acc=0.769, loss=45.169, backward_time=0.299, grad_norm=35.391, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.846e-04, train_time=4.741 +[gpub011:0/16] 2024-02-06 05:51:09,850 (trainer:762) INFO: 30epoch:train:5101-5200batch: iter_time=7.940e-05, forward_time=0.292, loss_ctc=49.609, loss_att=50.790, acc=0.773, loss=50.436, backward_time=0.299, grad_norm=42.424, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.846e-04, train_time=1.264 +[gpub011:0/16] 2024-02-06 05:53:42,488 (trainer:762) INFO: 30epoch:train:5201-5300batch: iter_time=8.082e-05, forward_time=0.337, loss_ctc=46.726, loss_att=40.873, acc=0.764, loss=42.629, backward_time=0.386, grad_norm=37.523, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.846e-04, train_time=1.525 +[gpub011:0/16] 2024-02-06 05:55:47,527 (trainer:762) INFO: 30epoch:train:5301-5400batch: iter_time=7.890e-05, forward_time=0.331, loss_ctc=42.231, loss_att=41.341, acc=0.762, loss=41.608, backward_time=0.304, grad_norm=37.323, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.846e-04, train_time=1.250 +[gpub011:0/16] 2024-02-06 05:58:06,900 (trainer:762) INFO: 30epoch:train:5401-5500batch: iter_time=8.995e-05, forward_time=0.292, loss_ctc=45.082, loss_att=45.623, acc=0.768, loss=45.461, backward_time=0.296, grad_norm=36.926, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.846e-04, train_time=1.394 +[gpub011:0/16] 2024-02-06 06:00:39,373 (trainer:762) INFO: 30epoch:train:5501-5600batch: iter_time=0.002, forward_time=0.392, loss_ctc=51.128, loss_att=43.972, acc=0.768, loss=46.119, backward_time=0.319, grad_norm=38.158, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.845e-04, train_time=1.524 +[gpub011:0/16] 2024-02-06 06:02:45,603 (trainer:762) INFO: 30epoch:train:5601-5700batch: iter_time=7.760e-05, forward_time=0.319, loss_ctc=51.363, loss_att=44.446, acc=0.749, loss=46.521, backward_time=0.304, grad_norm=41.394, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.845e-04, train_time=1.262 +[gpub011:0/16] 2024-02-06 06:04:56,777 (trainer:762) INFO: 30epoch:train:5701-5800batch: iter_time=7.758e-05, forward_time=0.291, loss_ctc=48.743, loss_att=47.198, acc=0.762, loss=47.662, backward_time=0.297, grad_norm=42.256, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.845e-04, train_time=1.311 +[gpub011:0/16] 2024-02-06 06:07:18,962 (trainer:762) INFO: 30epoch:train:5801-5900batch: iter_time=8.059e-05, forward_time=0.292, loss_ctc=45.828, loss_att=43.395, acc=0.779, loss=44.125, backward_time=0.297, grad_norm=35.088, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.845e-04, train_time=1.422 +[gpub011:0/16] 2024-02-06 06:09:25,837 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 06:09:38,130 (trainer:762) INFO: 30epoch:train:5901-6000batch: iter_time=7.894e-05, forward_time=0.361, loss_ctc=53.560, loss_att=52.367, acc=0.749, loss=52.725, backward_time=0.389, grad_norm=42.811, clip=100.000, loss_scale=9.808e+33, optim_step_time=0.099, optim0_lr0=1.845e-04, train_time=1.391 +[gpub011:0/16] 2024-02-06 06:11:49,602 (trainer:762) INFO: 30epoch:train:6001-6100batch: iter_time=8.022e-05, forward_time=0.291, loss_ctc=46.312, loss_att=45.501, acc=0.750, loss=45.744, backward_time=0.296, grad_norm=38.017, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.844e-04, train_time=1.314 +[gpub011:0/16] 2024-02-06 06:14:08,493 (trainer:762) INFO: 30epoch:train:6101-6200batch: iter_time=7.944e-05, forward_time=0.290, loss_ctc=47.036, loss_att=42.083, acc=0.770, loss=43.569, backward_time=0.295, grad_norm=37.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.844e-04, train_time=1.388 +[gpub011:0/16] 2024-02-06 06:15:31,270 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-06 06:15:50,605 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 06:15:54,238 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 06:15:54,238 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-06 06:15:54,242 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 06:22:01,342 (trainer:762) INFO: 30epoch:train:6201-6300batch: iter_time=3.463, forward_time=0.362, loss_ctc=47.976, loss_att=46.325, acc=0.771, loss=46.820, backward_time=0.307, grad_norm=37.466, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.844e-04, train_time=4.728 +[gpub011:0/16] 2024-02-06 06:24:08,220 (trainer:762) INFO: 30epoch:train:6301-6400batch: iter_time=7.374e-05, forward_time=0.289, loss_ctc=45.372, loss_att=48.312, acc=0.766, loss=47.430, backward_time=0.294, grad_norm=41.245, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.844e-04, train_time=1.269 +[gpub011:0/16] 2024-02-06 06:26:32,495 (trainer:762) INFO: 30epoch:train:6401-6500batch: iter_time=8.057e-05, forward_time=0.414, loss_ctc=49.318, loss_att=45.850, acc=0.751, loss=46.891, backward_time=0.320, grad_norm=39.212, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.844e-04, train_time=1.443 +[gpub011:0/16] 2024-02-06 06:27:19,765 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 06:28:48,401 (trainer:762) INFO: 30epoch:train:6501-6600batch: iter_time=8.401e-05, forward_time=0.287, loss_ctc=42.842, loss_att=39.528, acc=0.769, loss=40.522, backward_time=0.292, grad_norm=36.313, clip=100.000, loss_scale=3.252e+33, optim_step_time=0.094, optim0_lr0=1.843e-04, train_time=1.359 +[gpub011:0/16] 2024-02-06 06:31:23,289 (trainer:762) INFO: 30epoch:train:6601-6700batch: iter_time=8.663e-05, forward_time=0.385, loss_ctc=44.058, loss_att=44.862, acc=0.747, loss=44.621, backward_time=0.361, grad_norm=37.296, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.843e-04, train_time=1.548 +[gpub011:0/16] 2024-02-06 06:33:37,740 (trainer:762) INFO: 30epoch:train:6701-6800batch: iter_time=8.313e-05, forward_time=0.290, loss_ctc=47.450, loss_att=45.622, acc=0.772, loss=46.170, backward_time=0.295, grad_norm=36.553, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.843e-04, train_time=1.345 +[gpub011:0/16] 2024-02-06 06:36:00,995 (trainer:762) INFO: 30epoch:train:6801-6900batch: iter_time=8.067e-05, forward_time=0.358, loss_ctc=49.096, loss_att=41.643, acc=0.762, loss=43.879, backward_time=0.364, grad_norm=40.065, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=1.843e-04, train_time=1.432 +[gpub011:0/16] 2024-02-06 06:38:32,198 (trainer:762) INFO: 30epoch:train:6901-7000batch: iter_time=8.110e-05, forward_time=0.292, loss_ctc=50.163, loss_att=46.821, acc=0.748, loss=47.824, backward_time=0.296, grad_norm=42.244, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.842e-04, train_time=1.513 +[gpub011:0/16] 2024-02-06 06:40:41,672 (trainer:762) INFO: 30epoch:train:7001-7100batch: iter_time=7.927e-05, forward_time=0.290, loss_ctc=45.019, loss_att=44.279, acc=0.761, loss=44.501, backward_time=0.296, grad_norm=41.066, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.842e-04, train_time=1.295 +[gpub011:0/16] 2024-02-06 06:43:11,960 (trainer:762) INFO: 30epoch:train:7101-7200batch: iter_time=7.954e-05, forward_time=0.456, loss_ctc=53.687, loss_att=53.109, acc=0.749, loss=53.282, backward_time=0.338, grad_norm=43.231, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.842e-04, train_time=1.502 +[gpub011:0/16] 2024-02-06 06:45:29,770 (trainer:762) INFO: 30epoch:train:7201-7300batch: iter_time=8.008e-05, forward_time=0.290, loss_ctc=47.654, loss_att=43.630, acc=0.754, loss=44.838, backward_time=0.295, grad_norm=37.946, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.842e-04, train_time=1.379 +[gpub011:0/16] 2024-02-06 06:47:34,352 (trainer:762) INFO: 30epoch:train:7301-7400batch: iter_time=7.999e-05, forward_time=0.290, loss_ctc=47.484, loss_att=44.755, acc=0.765, loss=45.574, backward_time=0.296, grad_norm=40.424, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.842e-04, train_time=1.246 +[gpub011:0/16] 2024-02-06 06:50:31,946 (trainer:762) INFO: 30epoch:train:7401-7500batch: iter_time=3.282e-04, forward_time=0.384, loss_ctc=46.568, loss_att=42.465, acc=0.764, loss=43.696, backward_time=0.363, grad_norm=40.481, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.841e-04, train_time=1.775 +[gpub011:0/16] 2024-02-06 06:50:52,106 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-06 06:51:11,179 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 06:51:14,743 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 06:51:14,744 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-06 06:51:14,748 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 06:58:39,759 (trainer:762) INFO: 30epoch:train:7501-7600batch: iter_time=3.476, forward_time=0.370, loss_ctc=46.589, loss_att=44.498, acc=0.772, loss=45.125, backward_time=0.395, grad_norm=35.382, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.841e-04, train_time=4.879 +[gpub011:0/16] 2024-02-06 07:01:02,572 (trainer:762) INFO: 30epoch:train:7601-7700batch: iter_time=8.284e-05, forward_time=0.290, loss_ctc=49.158, loss_att=51.585, acc=0.771, loss=50.857, backward_time=0.298, grad_norm=43.439, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.841e-04, train_time=1.426 +[gpub011:0/16] 2024-02-06 07:03:35,679 (trainer:762) INFO: 30epoch:train:7701-7800batch: iter_time=3.135e-04, forward_time=0.444, loss_ctc=46.215, loss_att=40.907, acc=0.765, loss=42.499, backward_time=0.334, grad_norm=38.873, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.841e-04, train_time=1.532 +[gpub011:0/16] 2024-02-06 07:06:12,227 (trainer:762) INFO: 30epoch:train:7801-7900batch: iter_time=4.035e-04, forward_time=0.389, loss_ctc=41.602, loss_att=41.173, acc=0.765, loss=41.302, backward_time=0.350, grad_norm=38.249, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.841e-04, train_time=1.565 +[gpub011:0/16] 2024-02-06 07:08:38,277 (trainer:762) INFO: 30epoch:train:7901-8000batch: iter_time=8.993e-05, forward_time=0.288, loss_ctc=44.474, loss_att=45.906, acc=0.768, loss=45.477, backward_time=0.295, grad_norm=36.100, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.840e-04, train_time=1.460 +[gpub011:0/16] 2024-02-06 07:11:10,973 (trainer:762) INFO: 30epoch:train:8001-8100batch: iter_time=5.236e-04, forward_time=0.396, loss_ctc=50.307, loss_att=44.133, acc=0.769, loss=45.985, backward_time=0.324, grad_norm=38.655, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.112, optim0_lr0=1.840e-04, train_time=1.526 +[gpub011:0/16] 2024-02-06 07:13:14,346 (trainer:762) INFO: 30epoch:train:8101-8200batch: iter_time=9.059e-05, forward_time=0.290, loss_ctc=50.529, loss_att=43.751, acc=0.751, loss=45.784, backward_time=0.296, grad_norm=41.772, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.840e-04, train_time=1.234 +[gpub011:0/16] 2024-02-06 07:15:56,149 (trainer:762) INFO: 30epoch:train:8201-8300batch: iter_time=8.766e-05, forward_time=0.356, loss_ctc=48.242, loss_att=47.217, acc=0.762, loss=47.525, backward_time=0.322, grad_norm=43.699, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=1.840e-04, train_time=1.618 +[gpub011:0/16] 2024-02-06 07:18:10,042 (trainer:762) INFO: 30epoch:train:8301-8400batch: iter_time=8.457e-05, forward_time=0.290, loss_ctc=45.136, loss_att=43.052, acc=0.779, loss=43.677, backward_time=0.298, grad_norm=37.163, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.840e-04, train_time=1.338 +[gpub011:0/16] 2024-02-06 07:20:18,289 (trainer:762) INFO: 30epoch:train:8401-8500batch: iter_time=8.746e-05, forward_time=0.363, loss_ctc=53.086, loss_att=52.120, acc=0.750, loss=52.410, backward_time=0.313, grad_norm=44.372, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.839e-04, train_time=1.283 +[gpub011:0/16] 2024-02-06 07:23:06,113 (trainer:762) INFO: 30epoch:train:8501-8600batch: iter_time=8.292e-05, forward_time=0.318, loss_ctc=46.840, loss_att=44.976, acc=0.752, loss=45.535, backward_time=0.337, grad_norm=39.109, clip=100.000, loss_scale=4.517e+33, optim_step_time=0.096, optim0_lr0=1.839e-04, train_time=1.678 +[gpub011:0/16] 2024-02-06 07:25:10,686 (trainer:762) INFO: 30epoch:train:8601-8700batch: iter_time=8.455e-05, forward_time=0.289, loss_ctc=47.223, loss_att=41.759, acc=0.771, loss=43.398, backward_time=0.297, grad_norm=38.150, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.839e-04, train_time=1.245 +[gpub011:0/16] 2024-02-06 07:26:28,271 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-06 07:26:47,650 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 07:26:51,496 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 07:26:51,496 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-06 07:26:51,499 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 07:32:39,413 (trainer:762) INFO: 30epoch:train:8701-8800batch: iter_time=3.275, forward_time=0.353, loss_ctc=47.302, loss_att=45.571, acc=0.773, loss=46.090, backward_time=0.308, grad_norm=38.102, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.839e-04, train_time=4.487 +[gpub011:0/16] 2024-02-06 07:34:44,041 (trainer:762) INFO: 30epoch:train:8801-8900batch: iter_time=8.063e-05, forward_time=0.289, loss_ctc=45.041, loss_att=47.627, acc=0.765, loss=46.851, backward_time=0.295, grad_norm=39.607, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.839e-04, train_time=1.245 +[gpub011:0/16] 2024-02-06 07:37:19,216 (trainer:762) INFO: 30epoch:train:8901-9000batch: iter_time=7.799e-05, forward_time=0.453, loss_ctc=49.031, loss_att=45.463, acc=0.754, loss=46.533, backward_time=0.346, grad_norm=38.242, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.838e-04, train_time=1.552 +[gpub011:0/16] 2024-02-06 07:39:28,993 (trainer:762) INFO: 30epoch:train:9001-9100batch: iter_time=7.932e-05, forward_time=0.286, loss_ctc=42.405, loss_att=39.147, acc=0.772, loss=40.125, backward_time=0.294, grad_norm=39.415, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.838e-04, train_time=1.297 +[gpub011:0/16] 2024-02-06 07:42:11,795 (trainer:762) INFO: 30epoch:train:9101-9200batch: iter_time=8.100e-05, forward_time=0.361, loss_ctc=43.254, loss_att=44.172, acc=0.748, loss=43.896, backward_time=0.406, grad_norm=38.219, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.838e-04, train_time=1.628 +[gpub011:0/16] 2024-02-06 07:44:27,011 (trainer:762) INFO: 30epoch:train:9201-9300batch: iter_time=7.645e-05, forward_time=0.290, loss_ctc=47.666, loss_att=45.386, acc=0.773, loss=46.070, backward_time=0.297, grad_norm=37.564, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.838e-04, train_time=1.352 +[gpub011:0/16] 2024-02-06 07:46:32,609 (trainer:762) INFO: 30epoch:train:9301-9400batch: iter_time=7.747e-05, forward_time=0.290, loss_ctc=49.058, loss_att=41.628, acc=0.762, loss=43.857, backward_time=0.296, grad_norm=38.055, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.838e-04, train_time=1.256 +[gpub011:0/16] 2024-02-06 07:49:17,417 (trainer:762) INFO: 30epoch:train:9401-9500batch: iter_time=7.874e-05, forward_time=0.354, loss_ctc=49.742, loss_att=46.587, acc=0.747, loss=47.533, backward_time=0.390, grad_norm=41.639, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.837e-04, train_time=1.648 +[gpub011:0/16] 2024-02-06 07:51:37,825 (trainer:762) INFO: 30epoch:train:9501-9600batch: iter_time=7.885e-05, forward_time=0.421, loss_ctc=44.914, loss_att=44.061, acc=0.763, loss=44.317, backward_time=0.343, grad_norm=42.372, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.837e-04, train_time=1.404 +[gpub011:0/16] 2024-02-06 07:53:52,553 (trainer:762) INFO: 30epoch:train:9601-9700batch: iter_time=7.728e-05, forward_time=0.292, loss_ctc=53.648, loss_att=52.689, acc=0.750, loss=52.977, backward_time=0.300, grad_norm=44.773, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.837e-04, train_time=1.347 +[gpub011:0/16] 2024-02-06 07:56:28,219 (trainer:762) INFO: 30epoch:train:9701-9800batch: iter_time=8.140e-05, forward_time=0.444, loss_ctc=47.828, loss_att=44.095, acc=0.753, loss=45.215, backward_time=0.321, grad_norm=37.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.837e-04, train_time=1.556 +[gpub011:0/16] 2024-02-06 07:58:46,830 (trainer:762) INFO: 30epoch:train:9801-9900batch: iter_time=7.915e-05, forward_time=0.311, loss_ctc=47.387, loss_att=44.478, acc=0.766, loss=45.351, backward_time=0.296, grad_norm=40.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.836e-04, train_time=1.386 +[gpub011:0/16] 2024-02-06 08:00:58,372 (trainer:762) INFO: 30epoch:train:9901-10000batch: iter_time=7.119e-05, forward_time=0.290, loss_ctc=46.607, loss_att=42.350, acc=0.766, loss=43.627, backward_time=0.296, grad_norm=40.631, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.836e-04, train_time=1.315 +[gpub011:0/16] 2024-02-06 08:01:18,401 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-06 08:01:38,003 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 08:01:41,587 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 08:01:41,587 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-06 08:01:41,660 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 08:08:52,290 (trainer:762) INFO: 30epoch:train:10001-10100batch: iter_time=3.566, forward_time=0.346, loss_ctc=46.261, loss_att=43.444, acc=0.770, loss=44.289, backward_time=0.310, grad_norm=35.098, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.836e-04, train_time=4.739 +[gpub011:0/16] 2024-02-06 08:10:47,814 (trainer:762) INFO: 30epoch:train:10101-10200batch: iter_time=8.408e-05, forward_time=0.291, loss_ctc=48.657, loss_att=49.102, acc=0.771, loss=48.968, backward_time=0.299, grad_norm=41.939, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.836e-04, train_time=1.155 +[gpub011:0/16] 2024-02-06 08:13:49,267 (trainer:762) INFO: 30epoch:train:10201-10300batch: iter_time=1.938e-04, forward_time=0.373, loss_ctc=45.200, loss_att=40.540, acc=0.756, loss=41.938, backward_time=0.378, grad_norm=36.038, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.836e-04, train_time=1.814 +[gpub011:0/16] 2024-02-06 08:15:41,175 (trainer:762) INFO: 30epoch:train:10301-10400batch: iter_time=8.442e-05, forward_time=0.287, loss_ctc=41.593, loss_att=40.448, acc=0.762, loss=40.792, backward_time=0.295, grad_norm=37.222, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.835e-04, train_time=1.119 +[gpub011:0/16] 2024-02-06 08:18:05,936 (trainer:762) INFO: 30epoch:train:10401-10500batch: iter_time=8.111e-05, forward_time=0.369, loss_ctc=44.443, loss_att=45.209, acc=0.767, loss=44.979, backward_time=0.355, grad_norm=35.103, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.835e-04, train_time=1.447 +[gpub011:0/16] 2024-02-06 08:20:35,909 (trainer:762) INFO: 30epoch:train:10501-10600batch: iter_time=8.540e-05, forward_time=0.291, loss_ctc=50.884, loss_att=43.510, acc=0.765, loss=45.722, backward_time=0.298, grad_norm=37.051, clip=100.000, loss_scale=9.035e+33, optim_step_time=0.094, optim0_lr0=1.835e-04, train_time=1.500 +[gpub011:0/16] 2024-02-06 08:23:05,643 (trainer:762) INFO: 30epoch:train:10601-10700batch: iter_time=8.760e-05, forward_time=0.427, loss_ctc=50.061, loss_att=43.408, acc=0.749, loss=45.404, backward_time=0.344, grad_norm=40.025, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.835e-04, train_time=1.497 +[gpub011:0/16] 2024-02-06 08:25:00,148 (trainer:762) INFO: 30epoch:train:10701-10800batch: iter_time=8.627e-05, forward_time=0.290, loss_ctc=48.279, loss_att=47.027, acc=0.753, loss=47.402, backward_time=0.299, grad_norm=45.478, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.835e-04, train_time=1.145 +[gpub011:0/16] 2024-02-06 08:27:45,981 (trainer:762) INFO: 30epoch:train:10801-10900batch: iter_time=8.102e-05, forward_time=0.428, loss_ctc=44.787, loss_att=43.048, acc=0.774, loss=43.570, backward_time=0.334, grad_norm=35.129, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.834e-04, train_time=1.658 +[gpub011:0/16] 2024-02-06 08:29:44,315 (trainer:762) INFO: 30epoch:train:10901-11000batch: iter_time=8.418e-05, forward_time=0.291, loss_ctc=52.867, loss_att=51.868, acc=0.747, loss=52.168, backward_time=0.298, grad_norm=44.692, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.834e-04, train_time=1.183 +[gpub011:0/16] 2024-02-06 08:31:56,119 (trainer:762) INFO: 30epoch:train:11001-11100batch: iter_time=8.510e-05, forward_time=0.463, loss_ctc=46.733, loss_att=45.717, acc=0.738, loss=46.022, backward_time=0.322, grad_norm=41.271, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.834e-04, train_time=1.318 +[gpub011:0/16] 2024-02-06 08:32:12,745 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 08:34:34,474 (trainer:762) INFO: 30epoch:train:11101-11200batch: iter_time=8.498e-05, forward_time=0.291, loss_ctc=46.477, loss_att=41.295, acc=0.765, loss=42.850, backward_time=0.295, grad_norm=39.574, clip=100.000, loss_scale=5.612e+33, optim_step_time=0.094, optim0_lr0=1.834e-04, train_time=1.583 +[gpub011:0/16] 2024-02-06 08:36:13,016 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-06 08:36:32,621 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 08:36:36,239 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 08:36:36,239 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-06 08:36:36,243 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 08:42:50,077 (trainer:762) INFO: 30epoch:train:11201-11300batch: iter_time=3.572, forward_time=0.456, loss_ctc=46.910, loss_att=44.825, acc=0.774, loss=45.450, backward_time=0.334, grad_norm=38.567, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.834e-04, train_time=4.956 +[gpub011:0/16] 2024-02-06 08:45:08,696 (trainer:762) INFO: 30epoch:train:11301-11400batch: iter_time=8.303e-05, forward_time=0.289, loss_ctc=44.633, loss_att=46.409, acc=0.769, loss=45.876, backward_time=0.296, grad_norm=38.664, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.833e-04, train_time=1.386 +[gpub011:0/16] 2024-02-06 08:47:09,230 (trainer:762) INFO: 30epoch:train:11401-11500batch: iter_time=8.644e-05, forward_time=0.291, loss_ctc=49.229, loss_att=44.971, acc=0.753, loss=46.248, backward_time=0.298, grad_norm=38.684, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.833e-04, train_time=1.205 +[gpub011:0/16] 2024-02-06 08:49:21,952 (trainer:762) INFO: 30epoch:train:11501-11600batch: iter_time=9.937e-05, forward_time=0.438, loss_ctc=42.190, loss_att=38.898, acc=0.772, loss=39.885, backward_time=0.342, grad_norm=36.044, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.833e-04, train_time=1.327 +[gpub011:0/16] 2024-02-06 08:51:52,658 (trainer:762) INFO: 30epoch:train:11601-11700batch: iter_time=8.134e-05, forward_time=0.290, loss_ctc=43.587, loss_att=43.181, acc=0.753, loss=43.303, backward_time=0.295, grad_norm=37.535, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.833e-04, train_time=1.507 +[gpub011:0/16] 2024-02-06 08:53:45,924 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 08:54:14,601 (trainer:762) INFO: 30epoch:train:11701-11800batch: iter_time=8.462e-05, forward_time=0.356, loss_ctc=47.291, loss_att=44.845, acc=0.772, loss=45.579, backward_time=0.405, grad_norm=36.746, clip=100.000, loss_scale=4.642e+33, optim_step_time=0.099, optim0_lr0=1.833e-04, train_time=1.419 +[gpub011:0/16] 2024-02-06 08:56:02,953 (trainer:762) INFO: 30epoch:train:11801-11900batch: iter_time=9.416e-05, forward_time=0.288, loss_ctc=48.590, loss_att=41.078, acc=0.762, loss=43.331, backward_time=0.297, grad_norm=39.971, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.832e-04, train_time=1.084 +[gpub011:0/16] 2024-02-06 08:58:47,707 (trainer:762) INFO: 30epoch:train:11901-12000batch: iter_time=8.421e-05, forward_time=0.318, loss_ctc=50.007, loss_att=46.376, acc=0.749, loss=47.465, backward_time=0.296, grad_norm=42.574, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.832e-04, train_time=1.647 +[gpub011:0/16] 2024-02-06 09:01:04,936 (trainer:762) INFO: 30epoch:train:12001-12100batch: iter_time=8.420e-05, forward_time=0.384, loss_ctc=44.494, loss_att=43.731, acc=0.762, loss=43.960, backward_time=0.344, grad_norm=41.412, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.108, optim0_lr0=1.832e-04, train_time=1.372 +[gpub011:0/16] 2024-02-06 09:03:09,155 (trainer:762) INFO: 30epoch:train:12101-12200batch: iter_time=9.161e-05, forward_time=0.292, loss_ctc=53.311, loss_att=52.262, acc=0.750, loss=52.577, backward_time=0.301, grad_norm=42.621, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.832e-04, train_time=1.243 +[gpub011:0/16] 2024-02-06 09:05:58,150 (trainer:762) INFO: 30epoch:train:12201-12300batch: iter_time=9.327e-05, forward_time=0.349, loss_ctc=47.220, loss_att=43.577, acc=0.755, loss=44.670, backward_time=0.425, grad_norm=36.672, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.832e-04, train_time=1.690 +[gpub011:0/16] 2024-02-06 09:08:02,278 (trainer:762) INFO: 30epoch:train:12301-12400batch: iter_time=9.010e-05, forward_time=0.289, loss_ctc=47.205, loss_att=44.854, acc=0.764, loss=45.559, backward_time=0.298, grad_norm=40.342, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.831e-04, train_time=1.241 +[gpub011:0/16] 2024-02-06 09:10:32,450 (trainer:762) INFO: 30epoch:train:12401-12500batch: iter_time=9.451e-05, forward_time=0.291, loss_ctc=45.825, loss_att=42.281, acc=0.765, loss=43.344, backward_time=0.296, grad_norm=39.588, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.831e-04, train_time=1.502 +[gpub011:0/16] 2024-02-06 09:10:52,478 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-06 09:11:12,188 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 09:11:15,766 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 09:11:15,766 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-06 09:11:15,794 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 09:18:50,744 (trainer:762) INFO: 30epoch:train:12501-12600batch: iter_time=3.720, forward_time=0.407, loss_ctc=46.288, loss_att=43.452, acc=0.770, loss=44.303, backward_time=0.319, grad_norm=34.888, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.831e-04, train_time=4.983 +[gpub011:0/16] 2024-02-06 09:21:16,704 (trainer:762) INFO: 30epoch:train:12601-12700batch: iter_time=8.097e-05, forward_time=0.290, loss_ctc=48.191, loss_att=47.907, acc=0.773, loss=47.992, backward_time=0.296, grad_norm=38.458, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.831e-04, train_time=1.459 +[gpub011:0/16] 2024-02-06 09:23:52,399 (trainer:762) INFO: 30epoch:train:12701-12800batch: iter_time=0.001, forward_time=0.449, loss_ctc=45.300, loss_att=40.562, acc=0.757, loss=41.984, backward_time=0.322, grad_norm=36.762, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.831e-04, train_time=1.557 +[gpub011:0/16] 2024-02-06 09:26:03,544 (trainer:762) INFO: 30epoch:train:12801-12900batch: iter_time=8.403e-05, forward_time=0.289, loss_ctc=41.518, loss_att=41.148, acc=0.760, loss=41.259, backward_time=0.294, grad_norm=37.903, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.830e-04, train_time=1.311 +[gpub011:0/16] 2024-02-06 09:28:25,209 (trainer:762) INFO: 30epoch:train:12901-13000batch: iter_time=8.690e-05, forward_time=0.290, loss_ctc=43.781, loss_att=45.172, acc=0.767, loss=44.755, backward_time=0.295, grad_norm=34.972, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.830e-04, train_time=1.416 +[gpub011:0/16] 2024-02-06 09:31:14,194 (trainer:762) INFO: 30epoch:train:13001-13100batch: iter_time=8.599e-05, forward_time=0.515, loss_ctc=49.911, loss_att=42.775, acc=0.766, loss=44.916, backward_time=0.353, grad_norm=38.295, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.830e-04, train_time=1.690 +[gpub011:0/16] 2024-02-06 09:33:25,267 (trainer:762) INFO: 30epoch:train:13101-13200batch: iter_time=9.040e-05, forward_time=0.289, loss_ctc=50.433, loss_att=43.396, acc=0.752, loss=45.507, backward_time=0.295, grad_norm=40.951, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.830e-04, train_time=1.311 +[gpub011:0/16] 2024-02-06 09:35:55,716 (trainer:762) INFO: 30epoch:train:13201-13300batch: iter_time=8.762e-05, forward_time=0.290, loss_ctc=48.077, loss_att=46.210, acc=0.755, loss=46.770, backward_time=0.295, grad_norm=42.455, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.829e-04, train_time=1.504 +[gpub011:0/16] 2024-02-06 09:38:23,692 (trainer:762) INFO: 30epoch:train:13301-13400batch: iter_time=8.582e-05, forward_time=0.291, loss_ctc=44.852, loss_att=42.920, acc=0.776, loss=43.500, backward_time=0.298, grad_norm=36.613, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.829e-04, train_time=1.480 +[gpub011:0/16] 2024-02-06 09:40:53,463 (trainer:762) INFO: 30epoch:train:13401-13500batch: iter_time=8.004e-05, forward_time=0.333, loss_ctc=52.710, loss_att=51.665, acc=0.750, loss=51.978, backward_time=0.372, grad_norm=44.767, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.829e-04, train_time=1.497 +[gpub011:0/16] 2024-02-06 09:43:06,272 (trainer:762) INFO: 30epoch:train:13501-13600batch: iter_time=8.352e-05, forward_time=0.290, loss_ctc=46.157, loss_att=44.471, acc=0.744, loss=44.977, backward_time=0.295, grad_norm=37.442, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.829e-04, train_time=1.328 +[gpub011:0/16] 2024-02-06 09:45:13,388 (trainer:762) INFO: 30epoch:train:13601-13700batch: iter_time=8.544e-05, forward_time=0.289, loss_ctc=46.340, loss_att=40.871, acc=0.768, loss=42.511, backward_time=0.296, grad_norm=38.366, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.829e-04, train_time=1.271 +[gpub011:0/16] 2024-02-06 09:47:01,124 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-06 09:47:20,385 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 09:47:23,942 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 09:47:23,942 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-06 09:47:23,946 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 09:54:09,592 (trainer:762) INFO: 30epoch:train:13701-13800batch: iter_time=3.719, forward_time=0.444, loss_ctc=46.813, loss_att=45.907, acc=0.776, loss=46.179, backward_time=0.330, grad_norm=37.749, clip=100.000, loss_scale=3.141e+33, optim_step_time=0.097, optim0_lr0=1.828e-04, train_time=5.361 +[gpub011:0/16] 2024-02-06 09:56:32,396 (trainer:762) INFO: 30epoch:train:13801-13900batch: iter_time=7.751e-05, forward_time=0.289, loss_ctc=44.759, loss_att=48.390, acc=0.767, loss=47.301, backward_time=0.295, grad_norm=44.080, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.828e-04, train_time=1.429 +[gpub011:0/16] 2024-02-06 09:58:52,470 (trainer:762) INFO: 30epoch:train:13901-14000batch: iter_time=8.165e-05, forward_time=0.313, loss_ctc=48.657, loss_att=47.070, acc=0.764, loss=47.546, backward_time=0.302, grad_norm=37.648, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.828e-04, train_time=1.401 +[gpub011:0/16] 2024-02-06 10:01:44,119 (trainer:762) INFO: 30epoch:train:14001-14100batch: iter_time=8.176e-05, forward_time=0.363, loss_ctc=42.165, loss_att=38.718, acc=0.774, loss=39.752, backward_time=0.341, grad_norm=36.704, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.828e-04, train_time=1.716 +[gpub011:0/16] 2024-02-06 10:03:48,929 (trainer:762) INFO: 30epoch:train:14101-14200batch: iter_time=7.932e-05, forward_time=0.295, loss_ctc=43.125, loss_att=44.285, acc=0.756, loss=43.937, backward_time=0.296, grad_norm=36.831, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.828e-04, train_time=1.248 +[gpub011:0/16] 2024-02-06 10:06:35,762 (trainer:762) INFO: 30epoch:train:14201-14300batch: iter_time=8.344e-05, forward_time=0.414, loss_ctc=47.178, loss_att=45.795, acc=0.777, loss=46.210, backward_time=0.327, grad_norm=36.777, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.827e-04, train_time=1.668 +[gpub011:0/16] 2024-02-06 10:08:52,006 (trainer:762) INFO: 30epoch:train:14301-14400batch: iter_time=8.587e-05, forward_time=0.290, loss_ctc=48.274, loss_att=41.661, acc=0.768, loss=43.645, backward_time=0.295, grad_norm=37.458, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.827e-04, train_time=1.362 +[gpub011:0/16] 2024-02-06 10:11:20,294 (trainer:762) INFO: 30epoch:train:14401-14500batch: iter_time=8.385e-05, forward_time=0.293, loss_ctc=49.750, loss_att=47.063, acc=0.753, loss=47.869, backward_time=0.299, grad_norm=41.863, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.827e-04, train_time=1.483 +[gpub011:0/16] 2024-02-06 10:14:02,851 (trainer:762) INFO: 30epoch:train:14501-14600batch: iter_time=8.403e-05, forward_time=0.417, loss_ctc=44.785, loss_att=44.067, acc=0.771, loss=44.282, backward_time=0.369, grad_norm=38.945, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.827e-04, train_time=1.625 +[gpub011:0/16] 2024-02-06 10:16:26,634 (trainer:762) INFO: 30epoch:train:14601-14700batch: iter_time=8.350e-05, forward_time=0.293, loss_ctc=53.148, loss_att=52.790, acc=0.757, loss=52.897, backward_time=0.300, grad_norm=40.893, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.827e-04, train_time=1.437 +[gpub011:0/16] 2024-02-06 10:18:59,773 (trainer:762) INFO: 30epoch:train:14701-14800batch: iter_time=0.003, forward_time=0.405, loss_ctc=46.840, loss_att=43.049, acc=0.760, loss=44.186, backward_time=0.349, grad_norm=36.740, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.826e-04, train_time=1.532 +[gpub011:0/16] 2024-02-06 10:21:32,812 (trainer:762) INFO: 30epoch:train:14801-14900batch: iter_time=8.412e-05, forward_time=0.290, loss_ctc=47.045, loss_att=45.376, acc=0.773, loss=45.877, backward_time=0.296, grad_norm=37.944, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.826e-04, train_time=1.530 +[gpub011:0/16] 2024-02-06 10:23:47,137 (trainer:762) INFO: 30epoch:train:14901-15000batch: iter_time=8.294e-05, forward_time=0.290, loss_ctc=45.733, loss_att=42.670, acc=0.770, loss=43.589, backward_time=0.296, grad_norm=38.672, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.826e-04, train_time=1.342 +[gpub011:0/16] 2024-02-06 11:03:10,452 (trainer:361) INFO: 30epoch results: [train] iter_time=0.284, forward_time=0.333, loss_ctc=47.645, loss_att=45.083, acc=0.761, loss=45.851, backward_time=0.317, grad_norm=39.317, clip=100.000, loss_scale=6.153e+33, optim_step_time=0.097, optim0_lr0=1.841e-04, train_time=1.686, time=7 hours, 1 minute and 52.35 seconds, total_count=480000, gpu_max_cached_mem_GB=40.000, [valid] loss_ctc=36.219, cer_ctc=0.191, loss_att=37.548, acc=0.675, cer=0.339, wer=0.999, loss=37.149, time=38 minutes and 58.39 seconds, total_count=149472, gpu_max_cached_mem_GB=40.000 +[gpub011:0/16] 2024-02-06 11:03:20,048 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub011:0/16] 2024-02-06 11:03:20,134 (trainer:290) INFO: 31/45epoch started. Estimated time to finish: 4 days, 21 hours and 6 minutes +[gpub011:0/16] 2024-02-06 11:03:20,144 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-06 11:03:39,110 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 11:03:42,661 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 11:03:42,661 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-06 11:03:42,664 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 11:11:08,489 (trainer:762) INFO: 31epoch:train:1-100batch: iter_time=3.486, forward_time=0.319, loss_ctc=39.512, loss_att=37.717, acc=0.738, loss=38.255, backward_time=0.304, grad_norm=39.708, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.826e-04, train_time=4.683 +[gpub011:0/16] 2024-02-06 11:13:22,318 (trainer:762) INFO: 31epoch:train:101-200batch: iter_time=6.913e-04, forward_time=0.340, loss_ctc=52.914, loss_att=44.821, acc=0.747, loss=47.249, backward_time=0.315, grad_norm=41.858, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.826e-04, train_time=1.338 +[gpub011:0/16] 2024-02-06 11:16:07,776 (trainer:762) INFO: 31epoch:train:201-300batch: iter_time=7.880e-05, forward_time=0.355, loss_ctc=48.386, loss_att=49.342, acc=0.739, loss=49.055, backward_time=0.331, grad_norm=40.974, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.825e-04, train_time=1.654 +[gpub011:0/16] 2024-02-06 11:18:36,956 (trainer:762) INFO: 31epoch:train:301-400batch: iter_time=7.726e-05, forward_time=0.333, loss_ctc=51.856, loss_att=48.773, acc=0.739, loss=49.698, backward_time=0.305, grad_norm=44.899, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.825e-04, train_time=1.492 +[gpub011:0/16] 2024-02-06 11:21:29,206 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 11:22:42,316 (trainer:762) INFO: 31epoch:train:401-500batch: iter_time=8.026e-05, forward_time=0.460, loss_ctc=50.585, loss_att=53.152, acc=0.744, loss=52.382, backward_time=0.585, grad_norm=41.523, clip=100.000, loss_scale=4.353e+33, optim_step_time=0.118, optim0_lr0=1.825e-04, train_time=2.453 +[gpub011:0/16] 2024-02-06 11:25:01,997 (trainer:762) INFO: 31epoch:train:501-600batch: iter_time=8.161e-05, forward_time=0.305, loss_ctc=47.830, loss_att=43.851, acc=0.759, loss=45.044, backward_time=0.320, grad_norm=42.656, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.825e-04, train_time=1.396 +[gpub011:0/16] 2024-02-06 11:27:22,659 (trainer:762) INFO: 31epoch:train:601-700batch: iter_time=7.906e-05, forward_time=0.398, loss_ctc=56.433, loss_att=45.007, acc=0.754, loss=48.435, backward_time=0.303, grad_norm=53.122, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.825e-04, train_time=1.407 +[gpub011:0/16] 2024-02-06 11:30:49,375 (trainer:762) INFO: 31epoch:train:701-800batch: iter_time=0.090, forward_time=0.497, loss_ctc=44.310, loss_att=36.781, acc=0.759, loss=39.040, backward_time=0.449, grad_norm=38.782, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=1.824e-04, train_time=2.062 +[gpub011:0/16] 2024-02-06 11:33:10,673 (trainer:762) INFO: 31epoch:train:801-900batch: iter_time=8.977e-05, forward_time=0.287, loss_ctc=46.573, loss_att=37.135, acc=0.776, loss=39.966, backward_time=0.295, grad_norm=39.919, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.824e-04, train_time=1.417 +[gpub011:0/16] 2024-02-06 11:35:39,746 (trainer:762) INFO: 31epoch:train:901-1000batch: iter_time=8.609e-05, forward_time=0.432, loss_ctc=43.272, loss_att=40.438, acc=0.748, loss=41.288, backward_time=0.339, grad_norm=37.657, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.128, optim0_lr0=1.824e-04, train_time=1.491 +[gpub011:0/16] 2024-02-06 11:38:28,274 (trainer:762) INFO: 31epoch:train:1001-1100batch: iter_time=8.430e-05, forward_time=0.464, loss_ctc=52.390, loss_att=58.160, acc=0.737, loss=56.429, backward_time=0.324, grad_norm=43.495, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=1.824e-04, train_time=1.685 +[gpub011:0/16] 2024-02-06 11:40:38,631 (trainer:762) INFO: 31epoch:train:1101-1200batch: iter_time=7.925e-05, forward_time=0.289, loss_ctc=57.569, loss_att=54.934, acc=0.729, loss=55.724, backward_time=0.297, grad_norm=49.960, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.824e-04, train_time=1.303 +[gpub011:0/16] 2024-02-06 11:41:54,607 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-06 11:42:13,955 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 11:42:17,502 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 11:42:17,502 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-06 11:42:17,505 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 11:50:27,503 (trainer:762) INFO: 31epoch:train:1201-1300batch: iter_time=3.924, forward_time=0.302, loss_ctc=42.638, loss_att=39.397, acc=0.746, loss=40.369, backward_time=0.353, grad_norm=37.864, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.130, optim0_lr0=1.823e-04, train_time=5.888 +[gpub011:0/16] 2024-02-06 11:52:52,842 (trainer:762) INFO: 31epoch:train:1301-1400batch: iter_time=8.303e-05, forward_time=0.407, loss_ctc=51.527, loss_att=45.502, acc=0.737, loss=47.309, backward_time=0.334, grad_norm=44.855, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.823e-04, train_time=1.453 +[gpub011:0/16] 2024-02-06 11:55:23,226 (trainer:762) INFO: 31epoch:train:1401-1500batch: iter_time=7.968e-05, forward_time=0.287, loss_ctc=47.448, loss_att=44.191, acc=0.745, loss=45.168, backward_time=0.295, grad_norm=39.249, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.823e-04, train_time=1.504 +[gpub011:0/16] 2024-02-06 11:57:20,977 (trainer:762) INFO: 31epoch:train:1501-1600batch: iter_time=8.550e-05, forward_time=0.302, loss_ctc=40.200, loss_att=38.728, acc=0.757, loss=39.169, backward_time=0.302, grad_norm=36.080, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.823e-04, train_time=1.177 +[gpub011:0/16] 2024-02-06 12:00:06,559 (trainer:762) INFO: 31epoch:train:1601-1700batch: iter_time=8.749e-05, forward_time=0.423, loss_ctc=55.831, loss_att=52.818, acc=0.751, loss=53.722, backward_time=0.340, grad_norm=44.642, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.109, optim0_lr0=1.823e-04, train_time=1.655 +[gpub011:0/16] 2024-02-06 12:02:13,840 (trainer:762) INFO: 31epoch:train:1701-1800batch: iter_time=8.688e-05, forward_time=0.290, loss_ctc=52.581, loss_att=50.601, acc=0.748, loss=51.195, backward_time=0.296, grad_norm=43.687, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.822e-04, train_time=1.273 +[gpub011:0/16] 2024-02-06 12:04:13,080 (trainer:762) INFO: 31epoch:train:1801-1900batch: iter_time=9.184e-05, forward_time=0.299, loss_ctc=50.098, loss_att=47.387, acc=0.747, loss=48.200, backward_time=0.317, grad_norm=45.493, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.822e-04, train_time=1.192 +[gpub011:0/16] 2024-02-06 12:07:00,135 (trainer:762) INFO: 31epoch:train:1901-2000batch: iter_time=2.059e-04, forward_time=0.392, loss_ctc=50.099, loss_att=40.247, acc=0.763, loss=43.202, backward_time=0.311, grad_norm=38.524, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.116, optim0_lr0=1.822e-04, train_time=1.670 +[gpub011:0/16] 2024-02-06 12:09:07,998 (trainer:762) INFO: 31epoch:train:2001-2100batch: iter_time=7.955e-05, forward_time=0.289, loss_ctc=46.396, loss_att=35.528, acc=0.776, loss=38.788, backward_time=0.294, grad_norm=42.479, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.822e-04, train_time=1.278 +[gpub011:0/16] 2024-02-06 12:11:15,023 (trainer:762) INFO: 31epoch:train:2101-2200batch: iter_time=8.555e-05, forward_time=0.291, loss_ctc=42.409, loss_att=40.230, acc=0.754, loss=40.884, backward_time=0.298, grad_norm=37.564, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.822e-04, train_time=1.270 +[gpub011:0/16] 2024-02-06 12:13:25,355 (trainer:762) INFO: 31epoch:train:2201-2300batch: iter_time=8.318e-05, forward_time=0.319, loss_ctc=44.402, loss_att=45.404, acc=0.757, loss=45.103, backward_time=0.308, grad_norm=36.231, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.821e-04, train_time=1.303 +[gpub011:0/16] 2024-02-06 12:16:13,400 (trainer:762) INFO: 31epoch:train:2301-2400batch: iter_time=8.104e-05, forward_time=0.388, loss_ctc=49.009, loss_att=53.915, acc=0.734, loss=52.443, backward_time=0.307, grad_norm=43.598, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.106, optim0_lr0=1.821e-04, train_time=1.680 +[gpub011:0/16] 2024-02-06 12:18:07,121 (trainer:762) INFO: 31epoch:train:2401-2500batch: iter_time=8.248e-05, forward_time=0.290, loss_ctc=58.451, loss_att=53.397, acc=0.732, loss=54.913, backward_time=0.297, grad_norm=53.613, clip=100.000, loss_scale=3.427e+33, optim_step_time=0.094, optim0_lr0=1.821e-04, train_time=1.137 +[gpub011:0/16] 2024-02-06 12:18:27,173 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-06 12:18:46,057 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 12:18:49,574 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 12:18:49,574 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-06 12:18:49,615 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 12:26:45,113 (trainer:762) INFO: 31epoch:train:2501-2600batch: iter_time=3.843, forward_time=0.304, loss_ctc=37.998, loss_att=36.672, acc=0.755, loss=37.069, backward_time=0.299, grad_norm=39.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.821e-04, train_time=5.180 +[gpub011:0/16] 2024-02-06 12:29:18,941 (trainer:762) INFO: 31epoch:train:2601-2700batch: iter_time=8.197e-05, forward_time=0.367, loss_ctc=50.420, loss_att=44.466, acc=0.762, loss=46.253, backward_time=0.326, grad_norm=40.981, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.821e-04, train_time=1.538 +[gpub011:0/16] 2024-02-06 12:31:31,333 (trainer:762) INFO: 31epoch:train:2701-2800batch: iter_time=8.282e-05, forward_time=0.287, loss_ctc=46.886, loss_att=49.319, acc=0.748, loss=48.589, backward_time=0.296, grad_norm=40.425, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.820e-04, train_time=1.324 +[gpub011:0/16] 2024-02-06 12:33:40,674 (trainer:762) INFO: 31epoch:train:2801-2900batch: iter_time=8.199e-05, forward_time=0.305, loss_ctc=50.692, loss_att=48.629, acc=0.749, loss=49.248, backward_time=0.304, grad_norm=43.297, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.820e-04, train_time=1.293 +[gpub011:0/16] 2024-02-06 12:36:09,694 (trainer:762) INFO: 31epoch:train:2901-3000batch: iter_time=8.074e-05, forward_time=0.406, loss_ctc=49.621, loss_att=55.127, acc=0.751, loss=53.475, backward_time=0.321, grad_norm=41.805, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.820e-04, train_time=1.490 +[gpub011:0/16] 2024-02-06 12:38:36,647 (trainer:762) INFO: 31epoch:train:3001-3100batch: iter_time=8.013e-05, forward_time=0.320, loss_ctc=46.983, loss_att=44.247, acc=0.769, loss=45.067, backward_time=0.294, grad_norm=40.887, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.820e-04, train_time=1.469 +[gpub011:0/16] 2024-02-06 12:40:39,739 (trainer:762) INFO: 31epoch:train:3101-3200batch: iter_time=8.205e-05, forward_time=0.305, loss_ctc=54.334, loss_att=46.808, acc=0.760, loss=49.066, backward_time=0.310, grad_norm=51.067, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.820e-04, train_time=1.231 +[gpub011:0/16] 2024-02-06 12:43:12,847 (trainer:762) INFO: 31epoch:train:3201-3300batch: iter_time=8.805e-05, forward_time=0.286, loss_ctc=43.182, loss_att=35.815, acc=0.767, loss=38.025, backward_time=0.292, grad_norm=36.832, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.819e-04, train_time=1.531 +[gpub011:0/16] 2024-02-06 12:45:31,275 (trainer:762) INFO: 31epoch:train:3301-3400batch: iter_time=8.395e-05, forward_time=0.454, loss_ctc=45.130, loss_att=36.438, acc=0.783, loss=39.046, backward_time=0.314, grad_norm=38.785, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.819e-04, train_time=1.384 +[gpub011:0/16] 2024-02-06 12:48:00,199 (trainer:762) INFO: 31epoch:train:3401-3500batch: iter_time=8.848e-05, forward_time=0.298, loss_ctc=42.263, loss_att=39.202, acc=0.758, loss=40.120, backward_time=0.305, grad_norm=35.833, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.819e-04, train_time=1.489 +[gpub011:0/16] 2024-02-06 12:50:19,337 (trainer:762) INFO: 31epoch:train:3501-3600batch: iter_time=8.466e-05, forward_time=0.294, loss_ctc=51.069, loss_att=61.326, acc=0.741, loss=58.249, backward_time=0.302, grad_norm=43.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.819e-04, train_time=1.391 +[gpub011:0/16] 2024-02-06 12:52:57,368 (trainer:762) INFO: 31epoch:train:3601-3700batch: iter_time=1.951e-04, forward_time=0.411, loss_ctc=53.764, loss_att=53.576, acc=0.741, loss=53.632, backward_time=0.323, grad_norm=46.902, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.819e-04, train_time=1.580 +[gpub011:0/16] 2024-02-06 12:54:15,778 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-06 12:54:35,317 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 12:54:38,886 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 12:54:38,886 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-06 12:54:38,889 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 13:01:03,376 (trainer:762) INFO: 31epoch:train:3701-3800batch: iter_time=3.581, forward_time=0.289, loss_ctc=41.835, loss_att=39.278, acc=0.752, loss=40.045, backward_time=0.295, grad_norm=36.674, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.818e-04, train_time=4.860 +[gpub011:0/16] 2024-02-06 13:03:17,756 (trainer:762) INFO: 31epoch:train:3801-3900batch: iter_time=7.806e-05, forward_time=0.299, loss_ctc=50.179, loss_att=46.308, acc=0.737, loss=47.469, backward_time=0.304, grad_norm=42.915, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.818e-04, train_time=1.343 +[gpub011:0/16] 2024-02-06 13:05:39,033 (trainer:762) INFO: 31epoch:train:3901-4000batch: iter_time=7.585e-05, forward_time=0.467, loss_ctc=46.804, loss_att=44.258, acc=0.749, loss=45.022, backward_time=0.313, grad_norm=39.153, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.818e-04, train_time=1.413 +[gpub011:0/16] 2024-02-06 13:07:48,061 (trainer:762) INFO: 31epoch:train:4001-4100batch: iter_time=7.780e-05, forward_time=0.288, loss_ctc=39.902, loss_att=38.365, acc=0.759, loss=38.826, backward_time=0.294, grad_norm=35.260, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.818e-04, train_time=1.290 +[gpub011:0/16] 2024-02-06 13:10:05,192 (trainer:762) INFO: 31epoch:train:4101-4200batch: iter_time=7.816e-05, forward_time=0.305, loss_ctc=54.710, loss_att=52.727, acc=0.754, loss=53.322, backward_time=0.309, grad_norm=44.827, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.818e-04, train_time=1.372 +[gpub011:0/16] 2024-02-06 13:12:34,991 (trainer:762) INFO: 31epoch:train:4201-4300batch: iter_time=7.588e-05, forward_time=0.342, loss_ctc=51.905, loss_att=50.059, acc=0.754, loss=50.613, backward_time=0.371, grad_norm=43.494, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.817e-04, train_time=1.498 +[gpub011:0/16] 2024-02-06 13:15:08,531 (trainer:762) INFO: 31epoch:train:4301-4400batch: iter_time=7.575e-05, forward_time=0.287, loss_ctc=49.338, loss_att=47.444, acc=0.747, loss=48.012, backward_time=0.293, grad_norm=51.503, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.817e-04, train_time=1.535 +[gpub011:0/16] 2024-02-06 13:17:03,543 (trainer:762) INFO: 31epoch:train:4401-4500batch: iter_time=7.534e-05, forward_time=0.300, loss_ctc=49.239, loss_att=39.774, acc=0.765, loss=42.614, backward_time=0.316, grad_norm=38.903, clip=100.000, loss_scale=6.854e+33, optim_step_time=0.097, optim0_lr0=1.817e-04, train_time=1.150 +[gpub011:0/16] 2024-02-06 13:19:23,720 (trainer:762) INFO: 31epoch:train:4501-4600batch: iter_time=7.534e-05, forward_time=0.290, loss_ctc=45.852, loss_att=35.606, acc=0.777, loss=38.680, backward_time=0.294, grad_norm=40.232, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.817e-04, train_time=1.402 +[gpub011:0/16] 2024-02-06 13:22:01,339 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 13:22:01,347 (trainer:762) INFO: 31epoch:train:4601-4700batch: iter_time=1.726e-04, forward_time=0.397, loss_ctc=42.571, loss_att=40.453, acc=0.756, loss=41.089, backward_time=0.310, grad_norm=38.017, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=1.817e-04, train_time=1.576 +[gpub011:0/16] 2024-02-06 13:24:14,429 (trainer:762) INFO: 31epoch:train:4701-4800batch: iter_time=7.624e-05, forward_time=0.290, loss_ctc=44.398, loss_att=45.377, acc=0.760, loss=45.083, backward_time=0.294, grad_norm=36.673, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.816e-04, train_time=1.331 +[gpub011:0/16] 2024-02-06 13:26:08,216 (trainer:762) INFO: 31epoch:train:4801-4900batch: iter_time=7.709e-05, forward_time=0.300, loss_ctc=49.156, loss_att=53.761, acc=0.734, loss=52.380, backward_time=0.310, grad_norm=44.168, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.816e-04, train_time=1.138 +[gpub011:0/16] 2024-02-06 13:29:05,826 (trainer:762) INFO: 31epoch:train:4901-5000batch: iter_time=4.804e-04, forward_time=0.376, loss_ctc=57.858, loss_att=53.990, acc=0.734, loss=55.150, backward_time=0.330, grad_norm=53.003, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.816e-04, train_time=1.776 +[gpub011:0/16] 2024-02-06 13:29:25,855 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-06 13:29:45,272 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 13:29:49,117 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 13:29:49,117 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-06 13:29:49,121 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 13:36:49,805 (trainer:762) INFO: 31epoch:train:5001-5100batch: iter_time=3.452, forward_time=0.299, loss_ctc=37.812, loss_att=35.812, acc=0.748, loss=36.412, backward_time=0.304, grad_norm=38.909, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.816e-04, train_time=4.640 +[gpub011:0/16] 2024-02-06 13:39:16,768 (trainer:762) INFO: 31epoch:train:5101-5200batch: iter_time=8.644e-05, forward_time=0.339, loss_ctc=50.369, loss_att=43.061, acc=0.757, loss=45.253, backward_time=0.350, grad_norm=38.523, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.816e-04, train_time=1.469 +[gpub011:0/16] 2024-02-06 13:41:37,305 (trainer:762) INFO: 31epoch:train:5201-5300batch: iter_time=7.890e-05, forward_time=0.297, loss_ctc=46.328, loss_att=47.805, acc=0.745, loss=47.362, backward_time=0.308, grad_norm=38.363, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.815e-04, train_time=1.405 +[gpub011:0/16] 2024-02-06 13:43:41,067 (trainer:762) INFO: 31epoch:train:5301-5400batch: iter_time=7.778e-05, forward_time=0.288, loss_ctc=49.678, loss_att=47.161, acc=0.747, loss=47.916, backward_time=0.295, grad_norm=40.806, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.815e-04, train_time=1.237 +[gpub011:0/16] 2024-02-06 13:46:09,213 (trainer:762) INFO: 31epoch:train:5401-5500batch: iter_time=4.080e-04, forward_time=0.412, loss_ctc=49.407, loss_att=52.205, acc=0.750, loss=51.366, backward_time=0.345, grad_norm=42.139, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.815e-04, train_time=1.481 +[gpub011:0/16] 2024-02-06 13:48:44,799 (trainer:762) INFO: 31epoch:train:5501-5600batch: iter_time=8.000e-05, forward_time=0.289, loss_ctc=46.311, loss_att=42.807, acc=0.766, loss=43.858, backward_time=0.294, grad_norm=40.917, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.815e-04, train_time=1.556 +[gpub011:0/16] 2024-02-06 13:51:01,571 (trainer:762) INFO: 31epoch:train:5601-5700batch: iter_time=8.645e-05, forward_time=0.415, loss_ctc=53.854, loss_att=44.223, acc=0.759, loss=47.112, backward_time=0.320, grad_norm=49.014, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.127, optim0_lr0=1.815e-04, train_time=1.367 +[gpub011:0/16] 2024-02-06 13:53:11,285 (trainer:762) INFO: 31epoch:train:5701-5800batch: iter_time=8.216e-05, forward_time=0.291, loss_ctc=43.178, loss_att=35.799, acc=0.765, loss=38.013, backward_time=0.296, grad_norm=38.169, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.814e-04, train_time=1.297 +[gpub011:0/16] 2024-02-06 13:55:44,522 (trainer:762) INFO: 31epoch:train:5801-5900batch: iter_time=8.869e-05, forward_time=0.377, loss_ctc=44.646, loss_att=35.554, acc=0.782, loss=38.282, backward_time=0.314, grad_norm=37.969, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.814e-04, train_time=1.532 +[gpub011:0/16] 2024-02-06 13:57:46,310 (trainer:762) INFO: 31epoch:train:5901-6000batch: iter_time=7.895e-05, forward_time=0.296, loss_ctc=42.304, loss_att=39.392, acc=0.755, loss=40.265, backward_time=0.303, grad_norm=35.737, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.814e-04, train_time=1.218 +[gpub011:0/16] 2024-02-06 14:00:16,395 (trainer:762) INFO: 31epoch:train:6001-6100batch: iter_time=7.632e-05, forward_time=0.420, loss_ctc=51.161, loss_att=57.067, acc=0.743, loss=55.295, backward_time=0.350, grad_norm=41.251, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.814e-04, train_time=1.500 +[gpub011:0/16] 2024-02-06 14:02:24,940 (trainer:762) INFO: 31epoch:train:6101-6200batch: iter_time=7.840e-05, forward_time=0.308, loss_ctc=53.164, loss_att=53.192, acc=0.736, loss=53.183, backward_time=0.316, grad_norm=49.807, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.814e-04, train_time=1.286 +[gpub011:0/16] 2024-02-06 14:04:09,928 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-06 14:04:29,182 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 14:04:32,782 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 14:04:32,782 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-06 14:04:32,786 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 14:11:37,472 (trainer:762) INFO: 31epoch:train:6201-6300batch: iter_time=3.935, forward_time=0.287, loss_ctc=41.848, loss_att=39.060, acc=0.756, loss=39.896, backward_time=0.292, grad_norm=38.221, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.813e-04, train_time=5.525 +[gpub011:0/16] 2024-02-06 14:14:16,550 (trainer:762) INFO: 31epoch:train:6301-6400batch: iter_time=7.526e-05, forward_time=0.382, loss_ctc=48.998, loss_att=45.633, acc=0.752, loss=46.643, backward_time=0.342, grad_norm=42.493, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.813e-04, train_time=1.590 +[gpub011:0/16] 2024-02-06 14:16:17,035 (trainer:762) INFO: 31epoch:train:6401-6500batch: iter_time=8.238e-05, forward_time=0.304, loss_ctc=46.549, loss_att=45.764, acc=0.755, loss=45.999, backward_time=0.324, grad_norm=38.777, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.813e-04, train_time=1.205 +[gpub011:0/16] 2024-02-06 14:19:05,940 (trainer:762) INFO: 31epoch:train:6501-6600batch: iter_time=8.175e-05, forward_time=0.285, loss_ctc=39.560, loss_att=39.535, acc=0.763, loss=39.542, backward_time=0.292, grad_norm=37.390, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.813e-04, train_time=1.689 +[gpub011:0/16] 2024-02-06 14:21:24,463 (trainer:762) INFO: 31epoch:train:6601-6700batch: iter_time=7.994e-05, forward_time=0.291, loss_ctc=54.845, loss_att=52.747, acc=0.763, loss=53.376, backward_time=0.298, grad_norm=45.110, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.813e-04, train_time=1.385 +[gpub011:0/16] 2024-02-06 14:21:30,867 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 14:24:13,727 (trainer:762) INFO: 31epoch:train:6701-6800batch: iter_time=8.736e-05, forward_time=0.388, loss_ctc=51.856, loss_att=53.328, acc=0.753, loss=52.886, backward_time=0.354, grad_norm=44.615, clip=100.000, loss_scale=5.455e+33, optim_step_time=0.105, optim0_lr0=1.812e-04, train_time=1.692 +[gpub011:0/16] 2024-02-06 14:26:37,595 (trainer:762) INFO: 31epoch:train:6801-6900batch: iter_time=8.336e-05, forward_time=0.290, loss_ctc=48.622, loss_att=48.943, acc=0.758, loss=48.847, backward_time=0.295, grad_norm=48.436, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.812e-04, train_time=1.439 +[gpub011:0/16] 2024-02-06 14:27:16,624 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 14:29:12,966 (trainer:762) INFO: 31epoch:train:6901-7000batch: iter_time=8.132e-05, forward_time=0.288, loss_ctc=48.737, loss_att=40.050, acc=0.770, loss=42.656, backward_time=0.294, grad_norm=37.346, clip=100.000, loss_scale=3.147e+33, optim_step_time=0.094, optim0_lr0=1.812e-04, train_time=1.554 +[gpub011:0/16] 2024-02-06 14:31:44,490 (trainer:762) INFO: 31epoch:train:7001-7100batch: iter_time=8.351e-05, forward_time=0.380, loss_ctc=44.861, loss_att=35.246, acc=0.785, loss=38.131, backward_time=0.365, grad_norm=39.662, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.812e-04, train_time=1.515 +[gpub011:0/16] 2024-02-06 14:34:14,405 (trainer:762) INFO: 31epoch:train:7101-7200batch: iter_time=8.261e-05, forward_time=0.305, loss_ctc=41.566, loss_att=39.824, acc=0.762, loss=40.347, backward_time=0.305, grad_norm=39.005, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.812e-04, train_time=1.499 +[gpub011:0/16] 2024-02-06 14:36:29,736 (trainer:762) INFO: 31epoch:train:7201-7300batch: iter_time=8.633e-05, forward_time=0.290, loss_ctc=44.066, loss_att=47.696, acc=0.761, loss=46.607, backward_time=0.299, grad_norm=35.406, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.811e-04, train_time=1.353 +[gpub011:0/16] 2024-02-06 14:39:09,834 (trainer:762) INFO: 31epoch:train:7301-7400batch: iter_time=8.354e-05, forward_time=0.291, loss_ctc=48.911, loss_att=55.709, acc=0.739, loss=53.670, backward_time=0.296, grad_norm=44.185, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.811e-04, train_time=1.601 +[gpub011:0/16] 2024-02-06 14:41:38,892 (trainer:762) INFO: 31epoch:train:7401-7500batch: iter_time=5.695e-04, forward_time=0.405, loss_ctc=56.566, loss_att=55.057, acc=0.736, loss=55.510, backward_time=0.318, grad_norm=51.636, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.127, optim0_lr0=1.811e-04, train_time=1.490 +[gpub011:0/16] 2024-02-06 14:41:58,921 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-06 14:42:18,320 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 14:42:21,882 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 14:42:21,882 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-06 14:42:21,962 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 14:50:48,250 (trainer:762) INFO: 31epoch:train:7501-7600batch: iter_time=3.987, forward_time=0.302, loss_ctc=37.296, loss_att=36.226, acc=0.749, loss=36.547, backward_time=0.299, grad_norm=36.292, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.811e-04, train_time=5.493 +[gpub011:0/16] 2024-02-06 14:53:30,296 (trainer:762) INFO: 31epoch:train:7601-7700batch: iter_time=7.900e-05, forward_time=0.437, loss_ctc=49.284, loss_att=43.471, acc=0.755, loss=45.215, backward_time=0.311, grad_norm=39.221, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.108, optim0_lr0=1.811e-04, train_time=1.620 +[gpub011:0/16] 2024-02-06 14:55:50,047 (trainer:762) INFO: 31epoch:train:7701-7800batch: iter_time=7.903e-05, forward_time=0.306, loss_ctc=46.910, loss_att=48.234, acc=0.747, loss=47.836, backward_time=0.315, grad_norm=40.855, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.810e-04, train_time=1.398 +[gpub011:0/16] 2024-02-06 14:58:49,409 (trainer:762) INFO: 31epoch:train:7801-7900batch: iter_time=8.766e-05, forward_time=0.395, loss_ctc=48.999, loss_att=46.910, acc=0.748, loss=47.537, backward_time=0.302, grad_norm=39.963, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.105, optim0_lr0=1.810e-04, train_time=1.794 +[gpub011:0/16] 2024-02-06 15:00:49,932 (trainer:762) INFO: 31epoch:train:7901-8000batch: iter_time=7.806e-05, forward_time=0.340, loss_ctc=49.221, loss_att=51.850, acc=0.752, loss=51.061, backward_time=0.324, grad_norm=40.872, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.810e-04, train_time=1.205 +[gpub011:0/16] 2024-02-06 15:03:31,592 (trainer:762) INFO: 31epoch:train:8001-8100batch: iter_time=8.189e-05, forward_time=0.288, loss_ctc=45.662, loss_att=43.124, acc=0.765, loss=43.886, backward_time=0.293, grad_norm=39.911, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.810e-04, train_time=1.617 +[gpub011:0/16] 2024-02-06 15:06:11,427 (trainer:762) INFO: 31epoch:train:8101-8200batch: iter_time=9.314e-05, forward_time=0.428, loss_ctc=53.845, loss_att=43.961, acc=0.762, loss=46.926, backward_time=0.331, grad_norm=49.461, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=1.810e-04, train_time=1.598 +[gpub011:0/16] 2024-02-06 15:08:25,098 (trainer:762) INFO: 31epoch:train:8201-8300batch: iter_time=9.601e-05, forward_time=0.288, loss_ctc=42.493, loss_att=36.073, acc=0.765, loss=37.999, backward_time=0.296, grad_norm=37.162, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.809e-04, train_time=1.337 +[gpub011:0/16] 2024-02-06 15:11:15,950 (trainer:762) INFO: 31epoch:train:8301-8400batch: iter_time=8.995e-05, forward_time=0.379, loss_ctc=44.589, loss_att=35.748, acc=0.783, loss=38.400, backward_time=0.360, grad_norm=37.522, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.809e-04, train_time=1.709 +[gpub011:0/16] 2024-02-06 15:13:40,202 (trainer:762) INFO: 31epoch:train:8401-8500batch: iter_time=9.662e-05, forward_time=0.303, loss_ctc=42.205, loss_att=39.786, acc=0.752, loss=40.512, backward_time=0.311, grad_norm=38.023, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.809e-04, train_time=1.442 +[gpub011:0/16] 2024-02-06 15:15:30,478 (trainer:762) INFO: 31epoch:train:8501-8600batch: iter_time=8.336e-05, forward_time=0.294, loss_ctc=50.606, loss_att=57.536, acc=0.741, loss=55.457, backward_time=0.300, grad_norm=44.162, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.809e-04, train_time=1.103 +[gpub011:0/16] 2024-02-06 15:18:15,411 (trainer:762) INFO: 31epoch:train:8601-8700batch: iter_time=9.547e-05, forward_time=0.390, loss_ctc=52.891, loss_att=54.050, acc=0.734, loss=53.702, backward_time=0.320, grad_norm=46.962, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.112, optim0_lr0=1.809e-04, train_time=1.648 +[gpub011:0/16] 2024-02-06 15:19:45,787 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-06 15:20:05,345 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 15:20:08,889 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 15:20:08,889 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-06 15:20:08,892 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 15:27:07,751 (trainer:762) INFO: 31epoch:train:8701-8800batch: iter_time=3.914, forward_time=0.308, loss_ctc=41.543, loss_att=38.213, acc=0.752, loss=39.212, backward_time=0.309, grad_norm=40.001, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.808e-04, train_time=5.324 +[gpub011:0/16] 2024-02-06 15:29:33,560 (trainer:762) INFO: 31epoch:train:8801-8900batch: iter_time=7.808e-05, forward_time=0.373, loss_ctc=48.594, loss_att=44.519, acc=0.742, loss=45.741, backward_time=0.320, grad_norm=44.603, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.808e-04, train_time=1.458 +[gpub011:0/16] 2024-02-06 15:31:53,091 (trainer:762) INFO: 31epoch:train:8901-9000batch: iter_time=7.423e-05, forward_time=0.297, loss_ctc=46.095, loss_att=43.671, acc=0.751, loss=44.398, backward_time=0.302, grad_norm=40.648, clip=100.000, loss_scale=4.621e+33, optim_step_time=0.094, optim0_lr0=1.808e-04, train_time=1.395 +[gpub011:0/16] 2024-02-06 15:34:02,671 (trainer:762) INFO: 31epoch:train:9001-9100batch: iter_time=7.512e-05, forward_time=0.296, loss_ctc=39.881, loss_att=38.515, acc=0.762, loss=38.925, backward_time=0.312, grad_norm=34.566, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.808e-04, train_time=1.296 +[gpub011:0/16] 2024-02-06 15:37:01,380 (trainer:762) INFO: 31epoch:train:9101-9200batch: iter_time=7.622e-05, forward_time=0.429, loss_ctc=54.340, loss_att=52.175, acc=0.755, loss=52.825, backward_time=0.306, grad_norm=44.780, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.808e-04, train_time=1.787 +[gpub011:0/16] 2024-02-06 15:39:14,897 (trainer:762) INFO: 31epoch:train:9201-9300batch: iter_time=7.750e-05, forward_time=0.302, loss_ctc=51.852, loss_att=50.024, acc=0.753, loss=50.573, backward_time=0.305, grad_norm=43.442, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.807e-04, train_time=1.335 +[gpub011:0/16] 2024-02-06 15:41:23,935 (trainer:762) INFO: 31epoch:train:9301-9400batch: iter_time=8.044e-05, forward_time=0.307, loss_ctc=47.996, loss_att=46.607, acc=0.752, loss=47.024, backward_time=0.312, grad_norm=44.689, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.807e-04, train_time=1.291 +[gpub011:0/16] 2024-02-06 15:44:28,358 (trainer:762) INFO: 31epoch:train:9401-9500batch: iter_time=8.033e-05, forward_time=0.405, loss_ctc=48.776, loss_att=39.537, acc=0.766, loss=42.309, backward_time=0.327, grad_norm=40.773, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.807e-04, train_time=1.844 +[gpub011:0/16] 2024-02-06 15:46:16,613 (trainer:762) INFO: 31epoch:train:9501-9600batch: iter_time=7.722e-05, forward_time=0.289, loss_ctc=44.769, loss_att=34.822, acc=0.781, loss=37.806, backward_time=0.298, grad_norm=40.220, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.807e-04, train_time=1.082 +[gpub011:0/16] 2024-02-06 15:48:54,750 (trainer:762) INFO: 31epoch:train:9601-9700batch: iter_time=8.306e-05, forward_time=0.299, loss_ctc=42.014, loss_att=39.680, acc=0.759, loss=40.380, backward_time=0.302, grad_norm=38.994, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.807e-04, train_time=1.582 +[gpub011:0/16] 2024-02-06 15:51:23,478 (trainer:762) INFO: 31epoch:train:9701-9800batch: iter_time=7.727e-05, forward_time=0.450, loss_ctc=44.030, loss_att=44.890, acc=0.761, loss=44.632, backward_time=0.321, grad_norm=60.957, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.806e-04, train_time=1.487 +[gpub011:0/16] 2024-02-06 15:53:49,120 (trainer:762) INFO: 31epoch:train:9801-9900batch: iter_time=7.866e-05, forward_time=0.303, loss_ctc=48.261, loss_att=52.665, acc=0.739, loss=51.344, backward_time=0.318, grad_norm=42.724, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.806e-04, train_time=1.456 +[gpub011:0/16] 2024-02-06 15:56:10,553 (trainer:762) INFO: 31epoch:train:9901-10000batch: iter_time=7.787e-05, forward_time=0.383, loss_ctc=56.070, loss_att=53.009, acc=0.738, loss=53.927, backward_time=0.311, grad_norm=50.004, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.806e-04, train_time=1.415 +[gpub011:0/16] 2024-02-06 15:56:30,619 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-06 15:56:50,505 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 15:56:54,418 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 15:56:54,418 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-06 15:56:54,421 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 16:05:05,592 (trainer:762) INFO: 31epoch:train:10001-10100batch: iter_time=3.941, forward_time=0.318, loss_ctc=37.474, loss_att=36.318, acc=0.759, loss=36.665, backward_time=0.298, grad_norm=39.228, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.806e-04, train_time=5.350 +[gpub011:0/16] 2024-02-06 16:07:15,249 (trainer:762) INFO: 31epoch:train:10101-10200batch: iter_time=7.837e-05, forward_time=0.315, loss_ctc=49.156, loss_att=44.238, acc=0.765, loss=45.714, backward_time=0.310, grad_norm=38.964, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.806e-04, train_time=1.296 +[gpub011:0/16] 2024-02-06 16:09:45,899 (trainer:762) INFO: 31epoch:train:10201-10300batch: iter_time=7.945e-05, forward_time=0.338, loss_ctc=46.314, loss_att=49.313, acc=0.750, loss=48.413, backward_time=0.332, grad_norm=42.181, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.805e-04, train_time=1.507 +[gpub011:0/16] 2024-02-06 16:12:18,336 (trainer:762) INFO: 31epoch:train:10301-10400batch: iter_time=7.918e-05, forward_time=0.322, loss_ctc=48.740, loss_att=47.681, acc=0.752, loss=47.999, backward_time=0.301, grad_norm=42.646, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.805e-04, train_time=1.524 +[gpub011:0/16] 2024-02-06 16:15:15,046 (trainer:762) INFO: 31epoch:train:10401-10500batch: iter_time=8.072e-05, forward_time=0.292, loss_ctc=48.685, loss_att=54.432, acc=0.754, loss=52.708, backward_time=0.298, grad_norm=41.036, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.805e-04, train_time=1.766 +[gpub011:0/16] 2024-02-06 16:17:19,892 (trainer:762) INFO: 31epoch:train:10501-10600batch: iter_time=7.822e-05, forward_time=0.317, loss_ctc=46.032, loss_att=43.645, acc=0.772, loss=44.361, backward_time=0.311, grad_norm=41.413, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.805e-04, train_time=1.248 +[gpub011:0/16] 2024-02-06 16:20:27,999 (trainer:762) INFO: 31epoch:train:10601-10700batch: iter_time=8.115e-05, forward_time=0.406, loss_ctc=53.545, loss_att=46.093, acc=0.763, loss=48.328, backward_time=0.320, grad_norm=47.932, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.109, optim0_lr0=1.805e-04, train_time=1.882 +[gpub011:0/16] 2024-02-06 16:22:21,916 (trainer:762) INFO: 31epoch:train:10701-10800batch: iter_time=7.915e-05, forward_time=0.290, loss_ctc=42.567, loss_att=35.785, acc=0.772, loss=37.820, backward_time=0.295, grad_norm=38.163, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.805e-04, train_time=1.139 +[gpub011:0/16] 2024-02-06 16:25:21,759 (trainer:762) INFO: 31epoch:train:10801-10900batch: iter_time=8.213e-05, forward_time=0.287, loss_ctc=44.388, loss_att=35.776, acc=0.787, loss=38.360, backward_time=0.295, grad_norm=36.196, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.804e-04, train_time=1.798 +[gpub011:0/16] 2024-02-06 16:27:24,164 (trainer:762) INFO: 31epoch:train:10901-11000batch: iter_time=7.907e-05, forward_time=0.304, loss_ctc=41.680, loss_att=39.018, acc=0.762, loss=39.816, backward_time=0.304, grad_norm=34.690, clip=100.000, loss_scale=9.242e+33, optim_step_time=0.107, optim0_lr0=1.804e-04, train_time=1.224 +[gpub011:0/16] 2024-02-06 16:29:48,484 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 16:30:27,366 (trainer:762) INFO: 31epoch:train:11001-11100batch: iter_time=8.107e-05, forward_time=0.426, loss_ctc=50.937, loss_att=61.007, acc=0.743, loss=57.986, backward_time=0.315, grad_norm=43.610, clip=100.000, loss_scale=8.811e+33, optim_step_time=0.099, optim0_lr0=1.804e-04, train_time=1.833 +[gpub011:0/16] 2024-02-06 16:32:26,913 (trainer:762) INFO: 31epoch:train:11101-11200batch: iter_time=8.035e-05, forward_time=0.292, loss_ctc=51.643, loss_att=52.659, acc=0.744, loss=52.354, backward_time=0.298, grad_norm=47.045, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.804e-04, train_time=1.195 +[gpub011:0/16] 2024-02-06 16:34:37,474 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-06 16:34:56,638 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 16:35:00,206 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 16:35:00,206 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-06 16:35:00,209 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 16:41:40,281 (trainer:762) INFO: 31epoch:train:11201-11300batch: iter_time=3.621, forward_time=0.443, loss_ctc=41.059, loss_att=38.098, acc=0.762, loss=38.986, backward_time=0.323, grad_norm=35.831, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.804e-04, train_time=5.533 +[gpub011:0/16] 2024-02-06 16:43:52,095 (trainer:762) INFO: 31epoch:train:11301-11400batch: iter_time=7.922e-05, forward_time=0.291, loss_ctc=48.862, loss_att=44.171, acc=0.757, loss=45.579, backward_time=0.297, grad_norm=42.544, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.803e-04, train_time=1.318 +[gpub011:0/16] 2024-02-06 16:46:27,476 (trainer:762) INFO: 31epoch:train:11401-11500batch: iter_time=8.118e-05, forward_time=0.290, loss_ctc=45.959, loss_att=44.366, acc=0.756, loss=44.844, backward_time=0.297, grad_norm=38.631, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.803e-04, train_time=1.554 +[gpub011:0/16] 2024-02-06 16:48:42,867 (trainer:762) INFO: 31epoch:train:11501-11600batch: iter_time=8.776e-05, forward_time=0.400, loss_ctc=39.464, loss_att=38.811, acc=0.765, loss=39.007, backward_time=0.346, grad_norm=34.912, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.803e-04, train_time=1.354 +[gpub011:0/16] 2024-02-06 16:51:10,130 (trainer:762) INFO: 31epoch:train:11601-11700batch: iter_time=8.501e-05, forward_time=0.294, loss_ctc=53.457, loss_att=52.506, acc=0.763, loss=52.791, backward_time=0.303, grad_norm=42.866, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.803e-04, train_time=1.472 +[gpub011:0/16] 2024-02-06 16:52:32,690 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 16:53:28,830 (trainer:762) INFO: 31epoch:train:11701-11800batch: iter_time=8.747e-05, forward_time=0.289, loss_ctc=51.444, loss_att=52.578, acc=0.757, loss=52.238, backward_time=0.298, grad_norm=44.896, clip=100.000, loss_scale=3.934e+33, optim_step_time=0.094, optim0_lr0=1.803e-04, train_time=1.387 +[gpub011:0/16] 2024-02-06 16:55:46,535 (trainer:762) INFO: 31epoch:train:11801-11900batch: iter_time=3.422e-04, forward_time=0.290, loss_ctc=48.625, loss_att=49.225, acc=0.757, loss=49.045, backward_time=0.307, grad_norm=45.978, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.802e-04, train_time=1.377 +[gpub011:0/16] 2024-02-06 16:58:39,548 (trainer:762) INFO: 31epoch:train:11901-12000batch: iter_time=8.225e-05, forward_time=0.347, loss_ctc=48.233, loss_att=39.692, acc=0.771, loss=42.254, backward_time=0.390, grad_norm=35.732, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.802e-04, train_time=1.730 +[gpub011:0/16] 2024-02-06 17:01:20,902 (trainer:762) INFO: 31epoch:train:12001-12100batch: iter_time=8.478e-05, forward_time=0.289, loss_ctc=43.866, loss_att=34.143, acc=0.789, loss=37.060, backward_time=0.294, grad_norm=37.938, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.802e-04, train_time=1.613 +[gpub011:0/16] 2024-02-06 17:03:21,914 (trainer:762) INFO: 31epoch:train:12101-12200batch: iter_time=8.656e-05, forward_time=0.288, loss_ctc=41.873, loss_att=39.654, acc=0.764, loss=40.320, backward_time=0.295, grad_norm=36.446, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.802e-04, train_time=1.210 +[gpub011:0/16] 2024-02-06 17:06:24,573 (trainer:762) INFO: 31epoch:train:12201-12300batch: iter_time=0.001, forward_time=0.477, loss_ctc=43.613, loss_att=46.247, acc=0.764, loss=45.457, backward_time=0.334, grad_norm=36.697, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.802e-04, train_time=1.826 +[gpub011:0/16] 2024-02-06 17:08:33,055 (trainer:762) INFO: 31epoch:train:12301-12400batch: iter_time=7.972e-05, forward_time=0.290, loss_ctc=48.066, loss_att=54.544, acc=0.744, loss=52.601, backward_time=0.298, grad_norm=43.881, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.801e-04, train_time=1.285 +[gpub011:0/16] 2024-02-06 17:11:12,135 (trainer:762) INFO: 31epoch:train:12401-12500batch: iter_time=7.497e-05, forward_time=0.290, loss_ctc=55.323, loss_att=53.435, acc=0.743, loss=54.001, backward_time=0.296, grad_norm=50.556, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.801e-04, train_time=1.591 +[gpub011:0/16] 2024-02-06 17:11:32,188 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-06 17:11:51,634 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 17:11:55,212 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 17:11:55,212 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-06 17:11:55,215 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 17:20:52,304 (trainer:762) INFO: 31epoch:train:12501-12600batch: iter_time=4.174, forward_time=0.394, loss_ctc=37.331, loss_att=35.063, acc=0.763, loss=35.744, backward_time=0.307, grad_norm=37.789, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.801e-04, train_time=5.801 +[gpub011:0/16] 2024-02-06 17:23:25,242 (trainer:762) INFO: 31epoch:train:12601-12700batch: iter_time=7.864e-05, forward_time=0.290, loss_ctc=48.813, loss_att=42.415, acc=0.770, loss=44.335, backward_time=0.296, grad_norm=38.881, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.801e-04, train_time=1.529 +[gpub011:0/16] 2024-02-06 17:25:43,312 (trainer:762) INFO: 31epoch:train:12701-12800batch: iter_time=8.519e-05, forward_time=0.291, loss_ctc=46.329, loss_att=48.176, acc=0.754, loss=47.622, backward_time=0.295, grad_norm=44.921, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.801e-04, train_time=1.381 +[gpub011:0/16] 2024-02-06 17:28:54,776 (trainer:762) INFO: 31epoch:train:12801-12900batch: iter_time=8.261e-05, forward_time=0.391, loss_ctc=48.403, loss_att=46.964, acc=0.754, loss=47.396, backward_time=0.375, grad_norm=44.107, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.800e-04, train_time=1.914 +[gpub011:0/16] 2024-02-06 17:30:57,483 (trainer:762) INFO: 31epoch:train:12901-13000batch: iter_time=7.811e-05, forward_time=0.292, loss_ctc=49.575, loss_att=54.532, acc=0.754, loss=53.045, backward_time=0.299, grad_norm=41.761, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.800e-04, train_time=1.227 +[gpub011:0/16] 2024-02-06 17:33:52,466 (trainer:762) INFO: 31epoch:train:13001-13100batch: iter_time=8.452e-05, forward_time=0.289, loss_ctc=45.592, loss_att=42.731, acc=0.775, loss=43.589, backward_time=0.293, grad_norm=39.218, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.800e-04, train_time=1.749 +[gpub011:0/16] 2024-02-06 17:36:26,185 (trainer:762) INFO: 31epoch:train:13101-13200batch: iter_time=8.194e-05, forward_time=0.375, loss_ctc=52.527, loss_att=45.585, acc=0.766, loss=47.667, backward_time=0.408, grad_norm=47.782, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.800e-04, train_time=1.537 +[gpub011:0/16] 2024-02-06 17:38:42,627 (trainer:762) INFO: 31epoch:train:13201-13300batch: iter_time=8.081e-05, forward_time=0.290, loss_ctc=42.124, loss_att=35.855, acc=0.772, loss=37.736, backward_time=0.292, grad_norm=36.090, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.800e-04, train_time=1.364 +[gpub011:0/16] 2024-02-06 17:41:29,144 (trainer:762) INFO: 31epoch:train:13301-13400batch: iter_time=8.164e-05, forward_time=0.288, loss_ctc=44.160, loss_att=35.353, acc=0.789, loss=37.995, backward_time=0.294, grad_norm=37.146, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.799e-04, train_time=1.665 +[gpub011:0/16] 2024-02-06 17:44:13,894 (trainer:762) INFO: 31epoch:train:13401-13500batch: iter_time=8.361e-05, forward_time=0.314, loss_ctc=41.819, loss_att=39.629, acc=0.759, loss=40.286, backward_time=0.295, grad_norm=36.210, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.106, optim0_lr0=1.799e-04, train_time=1.647 +[gpub011:0/16] 2024-02-06 17:47:05,859 (trainer:762) INFO: 31epoch:train:13501-13600batch: iter_time=8.422e-05, forward_time=0.393, loss_ctc=50.150, loss_att=60.519, acc=0.747, loss=57.408, backward_time=0.321, grad_norm=42.918, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.108, optim0_lr0=1.799e-04, train_time=1.720 +[gpub011:0/16] 2024-02-06 17:49:18,939 (trainer:762) INFO: 31epoch:train:13601-13700batch: iter_time=8.395e-05, forward_time=0.288, loss_ctc=53.127, loss_att=54.149, acc=0.740, loss=53.842, backward_time=0.297, grad_norm=48.860, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.799e-04, train_time=1.331 +[gpub011:0/16] 2024-02-06 17:51:13,580 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-06 17:51:32,847 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 17:51:36,411 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 17:51:36,411 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-06 17:51:36,414 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 17:59:14,476 (trainer:762) INFO: 31epoch:train:13701-13800batch: iter_time=4.268, forward_time=0.369, loss_ctc=40.957, loss_att=38.576, acc=0.760, loss=39.290, backward_time=0.307, grad_norm=38.654, clip=100.000, loss_scale=3.842e+33, optim_step_time=0.097, optim0_lr0=1.799e-04, train_time=5.955 +[gpub011:0/16] 2024-02-06 18:01:14,336 (trainer:762) INFO: 31epoch:train:13801-13900batch: iter_time=7.756e-05, forward_time=0.290, loss_ctc=48.145, loss_att=43.950, acc=0.756, loss=45.208, backward_time=0.296, grad_norm=40.097, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.798e-04, train_time=1.199 +[gpub011:0/16] 2024-02-06 18:04:03,616 (trainer:762) INFO: 31epoch:train:13901-14000batch: iter_time=8.131e-05, forward_time=0.420, loss_ctc=46.080, loss_att=43.832, acc=0.759, loss=44.506, backward_time=0.344, grad_norm=38.946, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.798e-04, train_time=1.692 +[gpub011:0/16] 2024-02-06 18:06:31,331 (trainer:762) INFO: 31epoch:train:14001-14100batch: iter_time=8.231e-05, forward_time=0.286, loss_ctc=38.914, loss_att=38.715, acc=0.765, loss=38.774, backward_time=0.293, grad_norm=34.935, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.798e-04, train_time=1.477 +[gpub011:0/16] 2024-02-06 18:09:15,427 (trainer:762) INFO: 31epoch:train:14101-14200batch: iter_time=8.510e-05, forward_time=0.303, loss_ctc=54.533, loss_att=51.539, acc=0.765, loss=52.437, backward_time=0.308, grad_norm=44.515, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.798e-04, train_time=1.641 +[gpub011:0/16] 2024-02-06 18:11:56,233 (trainer:762) INFO: 31epoch:train:14201-14300batch: iter_time=8.564e-05, forward_time=0.386, loss_ctc=51.433, loss_att=52.546, acc=0.756, loss=52.212, backward_time=0.328, grad_norm=44.077, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.798e-04, train_time=1.608 +[gpub011:0/16] 2024-02-06 18:14:03,744 (trainer:762) INFO: 31epoch:train:14301-14400batch: iter_time=8.274e-05, forward_time=0.327, loss_ctc=47.353, loss_att=48.365, acc=0.760, loss=48.062, backward_time=0.296, grad_norm=46.993, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.798e-04, train_time=1.275 +[gpub011:0/16] 2024-02-06 18:16:52,190 (trainer:762) INFO: 31epoch:train:14401-14500batch: iter_time=6.913e-04, forward_time=0.382, loss_ctc=48.119, loss_att=39.459, acc=0.773, loss=42.057, backward_time=0.375, grad_norm=36.978, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.797e-04, train_time=1.683 +[gpub011:0/16] 2024-02-06 18:19:00,837 (trainer:762) INFO: 31epoch:train:14501-14600batch: iter_time=8.356e-05, forward_time=0.287, loss_ctc=44.410, loss_att=34.952, acc=0.786, loss=37.790, backward_time=0.296, grad_norm=37.982, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.797e-04, train_time=1.287 +[gpub011:0/16] 2024-02-06 18:22:10,119 (trainer:762) INFO: 31epoch:train:14601-14700batch: iter_time=8.434e-05, forward_time=0.348, loss_ctc=41.539, loss_att=39.605, acc=0.765, loss=40.185, backward_time=0.307, grad_norm=38.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.109, optim0_lr0=1.797e-04, train_time=1.892 +[gpub011:0/16] 2024-02-06 18:24:15,539 (trainer:762) INFO: 31epoch:train:14701-14800batch: iter_time=8.403e-05, forward_time=0.368, loss_ctc=44.159, loss_att=46.400, acc=0.765, loss=45.728, backward_time=0.315, grad_norm=35.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.797e-04, train_time=1.253 +[gpub011:0/16] 2024-02-06 18:26:54,677 (trainer:762) INFO: 31epoch:train:14801-14900batch: iter_time=8.462e-05, forward_time=0.297, loss_ctc=48.223, loss_att=55.037, acc=0.742, loss=52.993, backward_time=0.298, grad_norm=42.199, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.797e-04, train_time=1.592 +[gpub011:0/16] 2024-02-06 18:29:55,561 (trainer:762) INFO: 31epoch:train:14901-15000batch: iter_time=2.028e-04, forward_time=0.387, loss_ctc=54.818, loss_att=53.669, acc=0.738, loss=54.014, backward_time=0.326, grad_norm=52.056, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.115, optim0_lr0=1.796e-04, train_time=1.809 +[gpub011:0/16] 2024-02-06 19:09:35,929 (trainer:361) INFO: 31epoch results: [train] iter_time=0.308, forward_time=0.336, loss_ctc=47.479, loss_att=45.237, acc=0.756, loss=45.909, backward_time=0.315, grad_norm=41.613, clip=100.000, loss_scale=4.286e+33, optim_step_time=0.099, optim0_lr0=1.811e-04, train_time=1.786, time=7 hours, 26 minutes and 59.31 seconds, total_count=495000, gpu_max_cached_mem_GB=41.025, [valid] loss_ctc=35.901, cer_ctc=0.186, loss_att=38.143, acc=0.685, cer=0.310, wer=0.994, loss=37.471, time=39 minutes and 16.21 seconds, total_count=154143, gpu_max_cached_mem_GB=41.025 +[gpub011:0/16] 2024-02-06 19:09:46,610 (trainer:416) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub011:0/16] 2024-02-06 19:09:46,668 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/26epoch.pth +[gpub011:0/16] 2024-02-06 19:09:46,669 (trainer:290) INFO: 32/45epoch started. Estimated time to finish: 4 days, 14 hours and 42 minutes +[gpub011:0/16] 2024-02-06 19:09:47,029 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-06 19:10:05,676 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 19:10:09,368 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 19:10:09,368 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-06 19:10:09,371 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 19:18:21,946 (trainer:762) INFO: 32epoch:train:1-100batch: iter_time=3.796, forward_time=0.376, loss_ctc=49.884, loss_att=50.368, acc=0.749, loss=50.223, backward_time=0.313, grad_norm=44.868, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.796e-04, train_time=5.152 +[gpub011:0/16] 2024-02-06 19:20:38,214 (trainer:762) INFO: 32epoch:train:101-200batch: iter_time=8.590e-05, forward_time=0.297, loss_ctc=49.821, loss_att=47.669, acc=0.754, loss=48.314, backward_time=0.296, grad_norm=40.496, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.796e-04, train_time=1.362 +[gpub011:0/16] 2024-02-06 19:23:19,071 (trainer:762) INFO: 32epoch:train:201-300batch: iter_time=9.055e-05, forward_time=0.313, loss_ctc=47.292, loss_att=46.896, acc=0.761, loss=47.015, backward_time=0.315, grad_norm=38.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.796e-04, train_time=1.608 +[gpub011:0/16] 2024-02-06 19:26:20,097 (trainer:762) INFO: 32epoch:train:301-400batch: iter_time=8.651e-04, forward_time=0.342, loss_ctc=53.747, loss_att=51.500, acc=0.747, loss=52.174, backward_time=0.325, grad_norm=41.829, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.796e-04, train_time=1.811 +[gpub011:0/16] 2024-02-06 19:28:59,114 (trainer:762) INFO: 32epoch:train:401-500batch: iter_time=8.694e-05, forward_time=0.302, loss_ctc=44.197, loss_att=37.928, acc=0.773, loss=39.809, backward_time=0.295, grad_norm=38.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.795e-04, train_time=1.590 +[gpub011:0/16] 2024-02-06 19:31:24,598 (trainer:762) INFO: 32epoch:train:501-600batch: iter_time=9.343e-05, forward_time=0.320, loss_ctc=41.490, loss_att=42.534, acc=0.754, loss=42.220, backward_time=0.308, grad_norm=36.079, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.795e-04, train_time=1.455 +[gpub011:0/16] 2024-02-06 19:34:08,852 (trainer:762) INFO: 32epoch:train:601-700batch: iter_time=8.838e-05, forward_time=0.364, loss_ctc=45.483, loss_att=40.809, acc=0.762, loss=42.211, backward_time=0.302, grad_norm=38.422, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.114, optim0_lr0=1.795e-04, train_time=1.642 +[gpub011:0/16] 2024-02-06 19:36:47,869 (trainer:762) INFO: 32epoch:train:701-800batch: iter_time=8.196e-05, forward_time=0.292, loss_ctc=49.949, loss_att=49.371, acc=0.738, loss=49.544, backward_time=0.297, grad_norm=40.883, clip=100.000, loss_scale=7.685e+33, optim_step_time=0.096, optim0_lr0=1.795e-04, train_time=1.591 +[gpub011:0/16] 2024-02-06 19:39:15,899 (trainer:762) INFO: 32epoch:train:801-900batch: iter_time=8.714e-05, forward_time=0.299, loss_ctc=51.910, loss_att=55.740, acc=0.743, loss=54.591, backward_time=0.305, grad_norm=43.679, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.795e-04, train_time=1.480 +[gpub011:0/16] 2024-02-06 19:41:38,397 (trainer:762) INFO: 32epoch:train:901-1000batch: iter_time=2.462e-04, forward_time=0.381, loss_ctc=45.897, loss_att=41.778, acc=0.769, loss=43.014, backward_time=0.317, grad_norm=39.065, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=1.794e-04, train_time=1.425 +[gpub011:0/16] 2024-02-06 19:42:58,349 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 19:44:03,157 (trainer:762) INFO: 32epoch:train:1001-1100batch: iter_time=8.070e-05, forward_time=0.304, loss_ctc=63.488, loss_att=58.213, acc=0.749, loss=59.796, backward_time=0.302, grad_norm=51.783, clip=100.000, loss_scale=8.654e+33, optim_step_time=0.094, optim0_lr0=1.794e-04, train_time=1.447 +[gpub011:0/16] 2024-02-06 19:46:03,678 (trainer:762) INFO: 32epoch:train:1101-1200batch: iter_time=8.311e-05, forward_time=0.295, loss_ctc=51.985, loss_att=50.339, acc=0.754, loss=50.833, backward_time=0.309, grad_norm=42.794, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.794e-04, train_time=1.205 +[gpub011:0/16] 2024-02-06 19:47:31,962 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-06 19:47:51,325 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 19:47:54,867 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 19:47:54,867 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-06 19:47:54,870 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 19:55:33,140 (trainer:762) INFO: 32epoch:train:1201-1300batch: iter_time=4.326, forward_time=0.412, loss_ctc=46.589, loss_att=48.471, acc=0.752, loss=47.906, backward_time=0.331, grad_norm=44.689, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.794e-04, train_time=5.694 +[gpub011:0/16] 2024-02-06 19:58:15,092 (trainer:762) INFO: 32epoch:train:1301-1400batch: iter_time=7.867e-05, forward_time=0.310, loss_ctc=46.543, loss_att=42.785, acc=0.747, loss=43.913, backward_time=0.292, grad_norm=43.301, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.794e-04, train_time=1.620 +[gpub011:0/16] 2024-02-06 20:00:17,543 (trainer:762) INFO: 32epoch:train:1401-1500batch: iter_time=7.933e-05, forward_time=0.307, loss_ctc=49.399, loss_att=47.288, acc=0.752, loss=47.921, backward_time=0.305, grad_norm=37.528, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.793e-04, train_time=1.224 +[gpub011:0/16] 2024-02-06 20:02:38,941 (trainer:762) INFO: 32epoch:train:1501-1600batch: iter_time=3.920e-04, forward_time=0.419, loss_ctc=49.877, loss_att=45.972, acc=0.764, loss=47.144, backward_time=0.329, grad_norm=38.448, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.119, optim0_lr0=1.793e-04, train_time=1.414 +[gpub011:0/16] 2024-02-06 20:05:22,313 (trainer:762) INFO: 32epoch:train:1601-1700batch: iter_time=8.411e-05, forward_time=0.290, loss_ctc=50.215, loss_att=47.601, acc=0.749, loss=48.385, backward_time=0.294, grad_norm=43.040, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.793e-04, train_time=1.633 +[gpub011:0/16] 2024-02-06 20:07:23,090 (trainer:762) INFO: 32epoch:train:1701-1800batch: iter_time=8.679e-05, forward_time=0.311, loss_ctc=45.350, loss_att=43.639, acc=0.753, loss=44.152, backward_time=0.314, grad_norm=37.680, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.793e-04, train_time=1.208 +[gpub011:0/16] 2024-02-06 20:10:24,471 (trainer:762) INFO: 32epoch:train:1801-1900batch: iter_time=6.792e-04, forward_time=0.344, loss_ctc=40.134, loss_att=37.349, acc=0.762, loss=38.185, backward_time=0.361, grad_norm=33.421, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.793e-04, train_time=1.813 +[gpub011:0/16] 2024-02-06 20:12:27,717 (trainer:762) INFO: 32epoch:train:1901-2000batch: iter_time=8.365e-05, forward_time=0.290, loss_ctc=42.591, loss_att=40.754, acc=0.758, loss=41.305, backward_time=0.295, grad_norm=36.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.792e-04, train_time=1.232 +[gpub011:0/16] 2024-02-06 20:14:37,486 (trainer:762) INFO: 32epoch:train:2001-2100batch: iter_time=8.204e-05, forward_time=0.325, loss_ctc=51.255, loss_att=53.734, acc=0.715, loss=52.991, backward_time=0.306, grad_norm=43.533, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.792e-04, train_time=1.297 +[gpub011:0/16] 2024-02-06 20:17:30,620 (trainer:762) INFO: 32epoch:train:2101-2200batch: iter_time=7.892e-04, forward_time=0.364, loss_ctc=48.989, loss_att=49.637, acc=0.745, loss=49.443, backward_time=0.340, grad_norm=43.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.792e-04, train_time=1.732 +[gpub011:0/16] 2024-02-06 20:19:37,125 (trainer:762) INFO: 32epoch:train:2201-2300batch: iter_time=8.516e-05, forward_time=0.290, loss_ctc=52.785, loss_att=49.601, acc=0.754, loss=50.556, backward_time=0.298, grad_norm=45.902, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.792e-04, train_time=1.265 +[gpub011:0/16] 2024-02-06 20:21:41,237 (trainer:762) INFO: 32epoch:train:2301-2400batch: iter_time=8.510e-05, forward_time=0.301, loss_ctc=54.484, loss_att=53.593, acc=0.740, loss=53.860, backward_time=0.303, grad_norm=48.293, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.792e-04, train_time=1.241 +[gpub011:0/16] 2024-02-06 20:24:22,759 (trainer:762) INFO: 32epoch:train:2401-2500batch: iter_time=8.674e-05, forward_time=0.335, loss_ctc=50.887, loss_att=46.640, acc=0.758, loss=47.914, backward_time=0.302, grad_norm=45.872, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.792e-04, train_time=1.615 +[gpub011:0/16] 2024-02-06 20:24:42,912 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-06 20:25:02,205 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 20:25:05,770 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 20:25:05,770 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-06 20:25:05,773 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 20:33:24,423 (trainer:762) INFO: 32epoch:train:2501-2600batch: iter_time=4.150, forward_time=0.413, loss_ctc=48.413, loss_att=48.367, acc=0.746, loss=48.381, backward_time=0.319, grad_norm=42.568, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.791e-04, train_time=5.416 +[gpub011:0/16] 2024-02-06 20:36:00,894 (trainer:762) INFO: 32epoch:train:2601-2700batch: iter_time=7.957e-05, forward_time=0.294, loss_ctc=47.905, loss_att=44.892, acc=0.755, loss=45.796, backward_time=0.302, grad_norm=39.509, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.791e-04, train_time=1.565 +[gpub011:0/16] 2024-02-06 20:38:11,202 (trainer:762) INFO: 32epoch:train:2701-2800batch: iter_time=8.305e-05, forward_time=0.311, loss_ctc=46.743, loss_att=45.988, acc=0.764, loss=46.215, backward_time=0.304, grad_norm=39.005, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.791e-04, train_time=1.302 +[gpub011:0/16] 2024-02-06 20:40:50,448 (trainer:762) INFO: 32epoch:train:2801-2900batch: iter_time=3.501e-04, forward_time=0.414, loss_ctc=52.134, loss_att=48.296, acc=0.749, loss=49.448, backward_time=0.320, grad_norm=39.805, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.131, optim0_lr0=1.791e-04, train_time=1.593 +[gpub011:0/16] 2024-02-06 20:43:15,485 (trainer:762) INFO: 32epoch:train:2901-3000batch: iter_time=7.935e-05, forward_time=0.288, loss_ctc=43.883, loss_att=37.822, acc=0.775, loss=39.640, backward_time=0.292, grad_norm=39.305, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.791e-04, train_time=1.450 +[gpub011:0/16] 2024-02-06 20:44:40,683 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 20:45:08,497 (trainer:762) INFO: 32epoch:train:3001-3100batch: iter_time=7.956e-05, forward_time=0.330, loss_ctc=40.764, loss_att=38.256, acc=0.759, loss=39.008, backward_time=0.306, grad_norm=33.469, clip=100.000, loss_scale=5.507e+33, optim_step_time=0.096, optim0_lr0=1.790e-04, train_time=1.130 +[gpub011:0/16] 2024-02-06 20:47:47,651 (trainer:762) INFO: 32epoch:train:3101-3200batch: iter_time=0.001, forward_time=0.408, loss_ctc=44.041, loss_att=39.919, acc=0.761, loss=41.156, backward_time=0.319, grad_norm=37.244, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.114, optim0_lr0=1.790e-04, train_time=1.590 +[gpub011:0/16] 2024-02-06 20:50:26,445 (trainer:762) INFO: 32epoch:train:3201-3300batch: iter_time=8.802e-05, forward_time=0.289, loss_ctc=48.064, loss_att=48.566, acc=0.739, loss=48.415, backward_time=0.293, grad_norm=40.738, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.790e-04, train_time=1.589 +[gpub011:0/16] 2024-02-06 20:52:19,756 (trainer:762) INFO: 32epoch:train:3301-3400batch: iter_time=8.152e-05, forward_time=0.314, loss_ctc=51.455, loss_att=55.149, acc=0.731, loss=54.041, backward_time=0.315, grad_norm=46.545, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.790e-04, train_time=1.133 +[gpub011:0/16] 2024-02-06 20:55:05,692 (trainer:762) INFO: 32epoch:train:3401-3500batch: iter_time=8.661e-05, forward_time=0.287, loss_ctc=44.812, loss_att=42.127, acc=0.758, loss=42.933, backward_time=0.293, grad_norm=38.426, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.790e-04, train_time=1.659 +[gpub011:0/16] 2024-02-06 20:57:36,019 (trainer:762) INFO: 32epoch:train:3501-3600batch: iter_time=8.157e-05, forward_time=0.394, loss_ctc=60.644, loss_att=56.643, acc=0.745, loss=57.843, backward_time=0.325, grad_norm=54.947, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.128, optim0_lr0=1.789e-04, train_time=1.501 +[gpub011:0/16] 2024-02-06 20:59:37,730 (trainer:762) INFO: 32epoch:train:3601-3700batch: iter_time=8.246e-05, forward_time=0.318, loss_ctc=50.945, loss_att=50.024, acc=0.748, loss=50.300, backward_time=0.312, grad_norm=44.651, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.789e-04, train_time=1.219 +[gpub011:0/16] 2024-02-06 21:01:13,057 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-06 21:01:32,217 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 21:01:35,817 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 21:01:35,817 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-06 21:01:35,821 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 21:09:15,397 (trainer:762) INFO: 32epoch:train:3701-3800batch: iter_time=4.342, forward_time=0.396, loss_ctc=44.854, loss_att=47.334, acc=0.757, loss=46.590, backward_time=0.314, grad_norm=43.870, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.789e-04, train_time=5.776 +[gpub011:0/16] 2024-02-06 21:11:20,690 (trainer:762) INFO: 32epoch:train:3801-3900batch: iter_time=8.188e-05, forward_time=0.304, loss_ctc=45.461, loss_att=44.996, acc=0.757, loss=45.136, backward_time=0.297, grad_norm=42.437, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.789e-04, train_time=1.253 +[gpub011:0/16] 2024-02-06 21:13:13,344 (trainer:762) INFO: 32epoch:train:3901-4000batch: iter_time=8.659e-05, forward_time=0.311, loss_ctc=49.167, loss_att=47.128, acc=0.756, loss=47.740, backward_time=0.306, grad_norm=38.264, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.789e-04, train_time=1.126 +[gpub011:0/16] 2024-02-06 21:16:23,270 (trainer:762) INFO: 32epoch:train:4001-4100batch: iter_time=9.391e-05, forward_time=0.383, loss_ctc=48.375, loss_att=45.745, acc=0.776, loss=46.534, backward_time=0.351, grad_norm=35.324, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.788e-04, train_time=1.899 +[gpub011:0/16] 2024-02-06 21:18:29,461 (trainer:762) INFO: 32epoch:train:4101-4200batch: iter_time=8.724e-05, forward_time=0.298, loss_ctc=49.804, loss_att=47.038, acc=0.755, loss=47.868, backward_time=0.297, grad_norm=40.536, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.788e-04, train_time=1.262 +[gpub011:0/16] 2024-02-06 21:20:30,749 (trainer:762) INFO: 32epoch:train:4201-4300batch: iter_time=9.302e-05, forward_time=0.309, loss_ctc=44.503, loss_att=46.447, acc=0.759, loss=45.864, backward_time=0.306, grad_norm=37.668, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.788e-04, train_time=1.213 +[gpub011:0/16] 2024-02-06 21:23:20,263 (trainer:762) INFO: 32epoch:train:4301-4400batch: iter_time=8.885e-05, forward_time=0.397, loss_ctc=40.782, loss_att=37.228, acc=0.766, loss=38.294, backward_time=0.343, grad_norm=33.961, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.788e-04, train_time=1.695 +[gpub011:0/16] 2024-02-06 21:25:21,689 (trainer:762) INFO: 32epoch:train:4401-4500batch: iter_time=8.893e-05, forward_time=0.289, loss_ctc=42.036, loss_att=39.865, acc=0.766, loss=40.517, backward_time=0.296, grad_norm=38.447, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.788e-04, train_time=1.214 +[gpub011:0/16] 2024-02-06 21:27:21,692 (trainer:762) INFO: 32epoch:train:4501-4600batch: iter_time=7.935e-05, forward_time=0.323, loss_ctc=50.364, loss_att=53.029, acc=0.731, loss=52.230, backward_time=0.309, grad_norm=43.313, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.788e-04, train_time=1.200 +[gpub011:0/16] 2024-02-06 21:30:27,721 (trainer:762) INFO: 32epoch:train:4601-4700batch: iter_time=9.050e-05, forward_time=0.398, loss_ctc=48.307, loss_att=48.560, acc=0.764, loss=48.484, backward_time=0.359, grad_norm=41.622, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.787e-04, train_time=1.860 +[gpub011:0/16] 2024-02-06 21:32:34,971 (trainer:762) INFO: 32epoch:train:4701-4800batch: iter_time=8.003e-05, forward_time=0.290, loss_ctc=51.707, loss_att=48.127, acc=0.769, loss=49.201, backward_time=0.298, grad_norm=42.790, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.787e-04, train_time=1.272 +[gpub011:0/16] 2024-02-06 21:34:36,173 (trainer:762) INFO: 32epoch:train:4801-4900batch: iter_time=8.802e-05, forward_time=0.327, loss_ctc=52.630, loss_att=52.780, acc=0.752, loss=52.735, backward_time=0.312, grad_norm=48.452, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.787e-04, train_time=1.212 +[gpub011:0/16] 2024-02-06 21:37:44,641 (trainer:762) INFO: 32epoch:train:4901-5000batch: iter_time=0.001, forward_time=0.363, loss_ctc=49.733, loss_att=47.832, acc=0.764, loss=48.403, backward_time=0.322, grad_norm=46.340, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.787e-04, train_time=1.883 +[gpub011:0/16] 2024-02-06 21:38:04,960 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-06 21:38:24,484 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 21:38:28,362 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 21:38:28,362 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-06 21:38:28,366 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 21:46:38,121 (trainer:762) INFO: 32epoch:train:5001-5100batch: iter_time=3.974, forward_time=0.365, loss_ctc=46.896, loss_att=48.617, acc=0.747, loss=48.100, backward_time=0.305, grad_norm=41.715, clip=100.000, loss_scale=6.542e+33, optim_step_time=0.095, optim0_lr0=1.787e-04, train_time=5.336 +[gpub011:0/16] 2024-02-06 21:48:30,443 (trainer:762) INFO: 32epoch:train:5101-5200batch: iter_time=8.489e-05, forward_time=0.311, loss_ctc=47.870, loss_att=45.003, acc=0.755, loss=45.863, backward_time=0.308, grad_norm=37.684, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.786e-04, train_time=1.123 +[gpub011:0/16] 2024-02-06 21:51:06,233 (trainer:762) INFO: 32epoch:train:5201-5300batch: iter_time=8.465e-05, forward_time=0.296, loss_ctc=46.676, loss_att=46.249, acc=0.765, loss=46.377, backward_time=0.300, grad_norm=38.189, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.786e-04, train_time=1.558 +[gpub011:0/16] 2024-02-06 21:53:21,515 (trainer:762) INFO: 32epoch:train:5301-5400batch: iter_time=2.050e-04, forward_time=0.374, loss_ctc=51.817, loss_att=48.300, acc=0.751, loss=49.355, backward_time=0.327, grad_norm=41.035, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.109, optim0_lr0=1.786e-04, train_time=1.351 +[gpub011:0/16] 2024-02-06 21:55:24,357 (trainer:762) INFO: 32epoch:train:5401-5500batch: iter_time=8.978e-05, forward_time=0.295, loss_ctc=43.380, loss_att=37.448, acc=0.777, loss=39.227, backward_time=0.303, grad_norm=37.706, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.786e-04, train_time=1.229 +[gpub011:0/16] 2024-02-06 21:58:06,197 (trainer:762) INFO: 32epoch:train:5501-5600batch: iter_time=9.109e-05, forward_time=0.300, loss_ctc=40.708, loss_att=38.555, acc=0.758, loss=39.200, backward_time=0.302, grad_norm=34.854, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.786e-04, train_time=1.618 +[gpub011:0/16] 2024-02-06 21:59:15,377 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 22:00:20,942 (trainer:762) INFO: 32epoch:train:5601-5700batch: iter_time=4.495e-04, forward_time=0.386, loss_ctc=43.983, loss_att=40.187, acc=0.761, loss=41.326, backward_time=0.336, grad_norm=36.627, clip=100.000, loss_scale=8.129e+33, optim_step_time=0.110, optim0_lr0=1.785e-04, train_time=1.347 +[gpub011:0/16] 2024-02-06 22:02:25,557 (trainer:762) INFO: 32epoch:train:5701-5800batch: iter_time=8.275e-05, forward_time=0.288, loss_ctc=47.836, loss_att=48.612, acc=0.741, loss=48.380, backward_time=0.295, grad_norm=41.263, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.785e-04, train_time=1.244 +[gpub011:0/16] 2024-02-06 22:05:18,564 (trainer:762) INFO: 32epoch:train:5801-5900batch: iter_time=9.428e-05, forward_time=0.313, loss_ctc=51.063, loss_att=55.278, acc=0.732, loss=54.014, backward_time=0.308, grad_norm=46.409, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.785e-04, train_time=1.732 +[gpub011:0/16] 2024-02-06 22:07:19,048 (trainer:762) INFO: 32epoch:train:5901-6000batch: iter_time=9.203e-05, forward_time=0.352, loss_ctc=44.892, loss_att=42.073, acc=0.760, loss=42.919, backward_time=0.319, grad_norm=38.393, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.785e-04, train_time=1.204 +[gpub011:0/16] 2024-02-06 22:09:52,194 (trainer:762) INFO: 32epoch:train:6001-6100batch: iter_time=9.652e-05, forward_time=0.366, loss_ctc=59.138, loss_att=56.629, acc=0.747, loss=57.382, backward_time=0.313, grad_norm=54.568, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.785e-04, train_time=1.531 +[gpub011:0/16] 2024-02-06 22:11:57,316 (trainer:762) INFO: 32epoch:train:6101-6200batch: iter_time=9.279e-05, forward_time=0.315, loss_ctc=50.497, loss_att=49.318, acc=0.750, loss=49.672, backward_time=0.302, grad_norm=43.969, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.784e-04, train_time=1.250 +[gpub011:0/16] 2024-02-06 22:13:32,965 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-06 22:13:52,297 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 22:13:55,891 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 22:13:55,891 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-06 22:13:55,895 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 22:20:46,057 (trainer:762) INFO: 32epoch:train:6201-6300batch: iter_time=3.805, forward_time=0.393, loss_ctc=44.242, loss_att=46.853, acc=0.759, loss=46.070, backward_time=0.320, grad_norm=42.930, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.784e-04, train_time=5.288 +[gpub011:0/16] 2024-02-06 22:22:45,031 (trainer:762) INFO: 32epoch:train:6301-6400batch: iter_time=7.836e-05, forward_time=0.291, loss_ctc=44.522, loss_att=43.914, acc=0.758, loss=44.096, backward_time=0.296, grad_norm=39.614, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.784e-04, train_time=1.190 +[gpub011:0/16] 2024-02-06 22:25:02,337 (trainer:762) INFO: 32epoch:train:6401-6500batch: iter_time=8.271e-05, forward_time=0.310, loss_ctc=49.345, loss_att=47.801, acc=0.756, loss=48.264, backward_time=0.315, grad_norm=40.131, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.784e-04, train_time=1.372 +[gpub011:0/16] 2024-02-06 22:27:48,695 (trainer:762) INFO: 32epoch:train:6501-6600batch: iter_time=8.308e-05, forward_time=0.407, loss_ctc=48.797, loss_att=46.661, acc=0.775, loss=47.301, backward_time=0.348, grad_norm=37.848, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.784e-04, train_time=1.664 +[gpub011:0/16] 2024-02-06 22:29:51,877 (trainer:762) INFO: 32epoch:train:6601-6700batch: iter_time=7.562e-05, forward_time=0.291, loss_ctc=49.280, loss_att=46.598, acc=0.757, loss=47.403, backward_time=0.298, grad_norm=42.543, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.784e-04, train_time=1.231 +[gpub011:0/16] 2024-02-06 22:32:27,477 (trainer:762) INFO: 32epoch:train:6701-6800batch: iter_time=7.626e-05, forward_time=0.316, loss_ctc=44.446, loss_att=46.100, acc=0.761, loss=45.604, backward_time=0.304, grad_norm=35.950, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.783e-04, train_time=1.555 +[gpub011:0/16] 2024-02-06 22:34:54,663 (trainer:762) INFO: 32epoch:train:6801-6900batch: iter_time=4.672e-04, forward_time=0.423, loss_ctc=39.945, loss_att=36.884, acc=0.768, loss=37.803, backward_time=0.333, grad_norm=33.523, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.783e-04, train_time=1.472 +[gpub011:0/16] 2024-02-06 22:36:43,199 (trainer:762) INFO: 32epoch:train:6901-7000batch: iter_time=8.291e-05, forward_time=0.290, loss_ctc=41.702, loss_att=39.654, acc=0.768, loss=40.268, backward_time=0.297, grad_norm=37.658, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.783e-04, train_time=1.086 +[gpub011:0/16] 2024-02-06 22:39:16,743 (trainer:762) INFO: 32epoch:train:7001-7100batch: iter_time=8.225e-05, forward_time=0.318, loss_ctc=50.046, loss_att=53.355, acc=0.732, loss=52.362, backward_time=0.306, grad_norm=42.547, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.783e-04, train_time=1.535 +[gpub011:0/16] 2024-02-06 22:41:44,728 (trainer:762) INFO: 32epoch:train:7101-7200batch: iter_time=2.829e-04, forward_time=0.353, loss_ctc=48.326, loss_att=48.577, acc=0.764, loss=48.502, backward_time=0.304, grad_norm=39.598, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.783e-04, train_time=1.479 +[gpub011:0/16] 2024-02-06 22:43:43,538 (trainer:762) INFO: 32epoch:train:7201-7300batch: iter_time=8.573e-05, forward_time=0.348, loss_ctc=51.697, loss_att=47.829, acc=0.770, loss=48.990, backward_time=0.323, grad_norm=47.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.782e-04, train_time=1.188 +[gpub011:0/16] 2024-02-06 22:46:17,419 (trainer:762) INFO: 32epoch:train:7301-7400batch: iter_time=8.873e-05, forward_time=0.310, loss_ctc=52.474, loss_att=53.287, acc=0.750, loss=53.043, backward_time=0.311, grad_norm=51.372, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.782e-04, train_time=1.539 +[gpub011:0/16] 2024-02-06 22:48:15,661 (trainer:762) INFO: 32epoch:train:7401-7500batch: iter_time=8.260e-05, forward_time=0.292, loss_ctc=49.393, loss_att=47.719, acc=0.763, loss=48.222, backward_time=0.298, grad_norm=45.667, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.782e-04, train_time=1.182 +[gpub011:0/16] 2024-02-06 22:48:35,763 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-06 22:48:54,944 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 22:48:58,501 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 22:48:58,501 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-06 22:48:58,505 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 22:57:27,501 (trainer:762) INFO: 32epoch:train:7501-7600batch: iter_time=4.174, forward_time=0.416, loss_ctc=47.123, loss_att=48.158, acc=0.757, loss=47.847, backward_time=0.319, grad_norm=43.930, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.782e-04, train_time=5.519 +[gpub011:0/16] 2024-02-06 22:59:51,722 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-06 22:59:56,509 (trainer:762) INFO: 32epoch:train:7601-7700batch: iter_time=8.489e-05, forward_time=0.328, loss_ctc=47.372, loss_att=46.244, acc=0.761, loss=46.582, backward_time=0.299, grad_norm=37.364, clip=100.000, loss_scale=7.185e+33, optim_step_time=0.095, optim0_lr0=1.782e-04, train_time=1.490 +[gpub011:0/16] 2024-02-06 23:02:33,837 (trainer:762) INFO: 32epoch:train:7701-7800batch: iter_time=8.315e-05, forward_time=0.426, loss_ctc=46.100, loss_att=45.955, acc=0.769, loss=45.998, backward_time=0.328, grad_norm=36.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.781e-04, train_time=1.573 +[gpub011:0/16] 2024-02-06 23:05:21,064 (trainer:762) INFO: 32epoch:train:7801-7900batch: iter_time=9.875e-05, forward_time=0.290, loss_ctc=51.453, loss_att=50.392, acc=0.753, loss=50.710, backward_time=0.297, grad_norm=40.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.781e-04, train_time=1.672 +[gpub011:0/16] 2024-02-06 23:07:37,242 (trainer:762) INFO: 32epoch:train:7901-8000batch: iter_time=1.008e-04, forward_time=0.361, loss_ctc=43.288, loss_att=37.282, acc=0.780, loss=39.084, backward_time=0.334, grad_norm=38.024, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.781e-04, train_time=1.362 +[gpub011:0/16] 2024-02-06 23:10:00,440 (trainer:762) INFO: 32epoch:train:8001-8100batch: iter_time=8.520e-05, forward_time=0.343, loss_ctc=40.620, loss_att=41.499, acc=0.760, loss=41.235, backward_time=0.313, grad_norm=34.539, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.781e-04, train_time=1.432 +[gpub011:0/16] 2024-02-06 23:12:24,690 (trainer:762) INFO: 32epoch:train:8101-8200batch: iter_time=8.369e-05, forward_time=0.288, loss_ctc=43.455, loss_att=39.728, acc=0.767, loss=40.846, backward_time=0.295, grad_norm=36.253, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.781e-04, train_time=1.442 +[gpub011:0/16] 2024-02-06 23:15:19,177 (trainer:762) INFO: 32epoch:train:8201-8300batch: iter_time=8.771e-05, forward_time=0.480, loss_ctc=47.638, loss_att=48.818, acc=0.744, loss=48.464, backward_time=0.324, grad_norm=39.813, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.781e-04, train_time=1.745 +[gpub011:0/16] 2024-02-06 23:17:33,995 (trainer:762) INFO: 32epoch:train:8301-8400batch: iter_time=8.210e-05, forward_time=0.293, loss_ctc=50.958, loss_att=55.356, acc=0.748, loss=54.037, backward_time=0.299, grad_norm=44.286, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.780e-04, train_time=1.348 +[gpub011:0/16] 2024-02-06 23:20:10,119 (trainer:762) INFO: 32epoch:train:8401-8500batch: iter_time=8.892e-05, forward_time=0.288, loss_ctc=44.355, loss_att=41.185, acc=0.775, loss=42.136, backward_time=0.295, grad_norm=38.672, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.780e-04, train_time=1.561 +[gpub011:0/16] 2024-02-06 23:22:49,917 (trainer:762) INFO: 32epoch:train:8501-8600batch: iter_time=0.007, forward_time=0.438, loss_ctc=57.611, loss_att=56.446, acc=0.755, loss=56.795, backward_time=0.333, grad_norm=53.687, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.112, optim0_lr0=1.780e-04, train_time=1.598 +[gpub011:0/16] 2024-02-06 23:25:09,228 (trainer:762) INFO: 32epoch:train:8601-8700batch: iter_time=8.330e-05, forward_time=0.288, loss_ctc=50.042, loss_att=49.681, acc=0.759, loss=49.790, backward_time=0.297, grad_norm=42.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.780e-04, train_time=1.393 +[gpub011:0/16] 2024-02-06 23:26:46,882 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-06 23:27:06,434 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-06 23:27:10,032 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-06 23:27:10,032 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-06 23:27:10,092 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-06 23:34:31,336 (trainer:762) INFO: 32epoch:train:8701-8800batch: iter_time=4.058, forward_time=0.400, loss_ctc=43.871, loss_att=47.380, acc=0.757, loss=46.327, backward_time=0.317, grad_norm=41.694, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.780e-04, train_time=5.621 +[gpub011:0/16] 2024-02-06 23:36:50,544 (trainer:762) INFO: 32epoch:train:8801-8900batch: iter_time=7.128e-05, forward_time=0.288, loss_ctc=44.617, loss_att=41.116, acc=0.754, loss=42.166, backward_time=0.294, grad_norm=39.132, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.779e-04, train_time=1.392 +[gpub011:0/16] 2024-02-06 23:39:20,716 (trainer:762) INFO: 32epoch:train:8901-9000batch: iter_time=7.888e-05, forward_time=0.290, loss_ctc=48.679, loss_att=46.691, acc=0.756, loss=47.287, backward_time=0.295, grad_norm=38.250, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.779e-04, train_time=1.501 +[gpub011:0/16] 2024-02-06 23:42:06,189 (trainer:762) INFO: 32epoch:train:9001-9100batch: iter_time=7.792e-05, forward_time=0.395, loss_ctc=48.533, loss_att=44.724, acc=0.773, loss=45.866, backward_time=0.353, grad_norm=37.733, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.779e-04, train_time=1.655 +[gpub011:0/16] 2024-02-06 23:44:18,634 (trainer:762) INFO: 32epoch:train:9101-9200batch: iter_time=8.019e-05, forward_time=0.291, loss_ctc=48.901, loss_att=46.467, acc=0.755, loss=47.197, backward_time=0.298, grad_norm=44.229, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.779e-04, train_time=1.324 +[gpub011:0/16] 2024-02-06 23:46:30,935 (trainer:762) INFO: 32epoch:train:9201-9300batch: iter_time=8.896e-05, forward_time=0.291, loss_ctc=44.851, loss_att=42.776, acc=0.759, loss=43.398, backward_time=0.296, grad_norm=36.771, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.779e-04, train_time=1.323 +[gpub011:0/16] 2024-02-06 23:49:30,644 (trainer:762) INFO: 32epoch:train:9301-9400batch: iter_time=9.353e-05, forward_time=0.387, loss_ctc=39.453, loss_att=36.672, acc=0.767, loss=37.506, backward_time=0.330, grad_norm=34.426, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.124, optim0_lr0=1.778e-04, train_time=1.797 +[gpub011:0/16] 2024-02-06 23:51:47,883 (trainer:762) INFO: 32epoch:train:9401-9500batch: iter_time=8.513e-05, forward_time=0.289, loss_ctc=41.518, loss_att=40.198, acc=0.763, loss=40.594, backward_time=0.296, grad_norm=49.644, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.778e-04, train_time=1.372 +[gpub011:0/16] 2024-02-06 23:54:01,449 (trainer:762) INFO: 32epoch:train:9501-9600batch: iter_time=8.983e-05, forward_time=0.289, loss_ctc=50.200, loss_att=53.217, acc=0.720, loss=52.312, backward_time=0.296, grad_norm=42.879, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.778e-04, train_time=1.335 +[gpub011:0/16] 2024-02-06 23:56:44,642 (trainer:762) INFO: 32epoch:train:9601-9700batch: iter_time=9.458e-05, forward_time=0.353, loss_ctc=48.082, loss_att=48.523, acc=0.750, loss=48.390, backward_time=0.331, grad_norm=40.858, clip=100.000, loss_scale=5.400e+33, optim_step_time=0.106, optim0_lr0=1.778e-04, train_time=1.632 +[gpub011:0/16] 2024-02-06 23:59:05,873 (trainer:762) INFO: 32epoch:train:9701-9800batch: iter_time=8.444e-05, forward_time=0.316, loss_ctc=52.478, loss_att=48.840, acc=0.757, loss=49.932, backward_time=0.326, grad_norm=44.351, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.778e-04, train_time=1.412 +[gpub011:0/16] 2024-02-07 00:01:13,189 (trainer:762) INFO: 32epoch:train:9801-9900batch: iter_time=8.076e-05, forward_time=0.291, loss_ctc=51.652, loss_att=53.245, acc=0.745, loss=52.767, backward_time=0.296, grad_norm=49.721, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.778e-04, train_time=1.273 +[gpub011:0/16] 2024-02-07 00:03:42,639 (trainer:762) INFO: 32epoch:train:9901-10000batch: iter_time=8.277e-05, forward_time=0.304, loss_ctc=49.049, loss_att=45.406, acc=0.763, loss=46.499, backward_time=0.312, grad_norm=45.332, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.777e-04, train_time=1.494 +[gpub011:0/16] 2024-02-07 00:04:02,772 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-07 00:04:21,924 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 00:04:25,496 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 00:04:25,496 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-07 00:04:25,499 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 00:13:53,571 (trainer:762) INFO: 32epoch:train:10001-10100batch: iter_time=4.185, forward_time=0.455, loss_ctc=46.464, loss_att=49.103, acc=0.757, loss=48.311, backward_time=0.328, grad_norm=42.274, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.777e-04, train_time=6.110 +[gpub011:0/16] 2024-02-07 00:16:44,246 (trainer:762) INFO: 32epoch:train:10101-10200batch: iter_time=8.569e-05, forward_time=0.290, loss_ctc=46.880, loss_att=46.030, acc=0.762, loss=46.285, backward_time=0.295, grad_norm=40.640, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.777e-04, train_time=1.706 +[gpub011:0/16] 2024-02-07 00:19:35,570 (trainer:762) INFO: 32epoch:train:10201-10300batch: iter_time=8.956e-05, forward_time=0.292, loss_ctc=46.086, loss_att=46.269, acc=0.769, loss=46.214, backward_time=0.298, grad_norm=37.001, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.777e-04, train_time=1.713 +[gpub011:0/16] 2024-02-07 00:22:33,468 (trainer:762) INFO: 32epoch:train:10301-10400batch: iter_time=9.497e-05, forward_time=0.504, loss_ctc=51.425, loss_att=50.263, acc=0.754, loss=50.612, backward_time=0.339, grad_norm=41.020, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=1.777e-04, train_time=1.778 +[gpub011:0/16] 2024-02-07 00:25:41,551 (trainer:762) INFO: 32epoch:train:10401-10500batch: iter_time=9.862e-05, forward_time=0.287, loss_ctc=43.053, loss_att=37.257, acc=0.779, loss=38.996, backward_time=0.292, grad_norm=37.019, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.776e-04, train_time=1.881 +[gpub011:0/16] 2024-02-07 00:27:58,448 (trainer:762) INFO: 32epoch:train:10501-10600batch: iter_time=9.735e-05, forward_time=0.288, loss_ctc=40.102, loss_att=41.379, acc=0.761, loss=40.996, backward_time=0.304, grad_norm=34.512, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.776e-04, train_time=1.368 +[gpub011:0/16] 2024-02-07 00:30:55,426 (trainer:762) INFO: 32epoch:train:10601-10700batch: iter_time=9.176e-05, forward_time=0.362, loss_ctc=43.687, loss_att=39.894, acc=0.768, loss=41.032, backward_time=0.389, grad_norm=34.826, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.776e-04, train_time=1.770 +[gpub011:0/16] 2024-02-07 00:33:40,019 (trainer:762) INFO: 32epoch:train:10701-10800batch: iter_time=9.083e-05, forward_time=0.289, loss_ctc=47.178, loss_att=48.321, acc=0.746, loss=47.978, backward_time=0.295, grad_norm=39.689, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.776e-04, train_time=1.646 +[gpub011:0/16] 2024-02-07 00:36:46,658 (trainer:762) INFO: 32epoch:train:10801-10900batch: iter_time=9.418e-05, forward_time=0.373, loss_ctc=50.616, loss_att=55.304, acc=0.748, loss=53.898, backward_time=0.480, grad_norm=44.333, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.776e-04, train_time=1.866 +[gpub011:0/16] 2024-02-07 00:39:48,438 (trainer:762) INFO: 32epoch:train:10901-11000batch: iter_time=8.062e-05, forward_time=0.290, loss_ctc=44.315, loss_att=40.722, acc=0.778, loss=41.800, backward_time=0.292, grad_norm=37.601, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.775e-04, train_time=1.818 +[gpub011:0/16] 2024-02-07 00:42:28,885 (trainer:762) INFO: 32epoch:train:11001-11100batch: iter_time=9.044e-05, forward_time=0.293, loss_ctc=57.783, loss_att=56.202, acc=0.756, loss=56.676, backward_time=0.299, grad_norm=55.729, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.775e-04, train_time=1.604 +[gpub011:0/16] 2024-02-07 00:45:16,046 (trainer:762) INFO: 32epoch:train:11101-11200batch: iter_time=4.002e-04, forward_time=0.422, loss_ctc=50.049, loss_att=50.002, acc=0.757, loss=50.016, backward_time=0.320, grad_norm=42.951, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.121, optim0_lr0=1.775e-04, train_time=1.671 +[gpub011:0/16] 2024-02-07 00:47:18,784 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-07 00:47:38,168 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 00:47:41,798 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 00:47:41,799 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-07 00:47:41,802 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 00:55:24,482 (trainer:762) INFO: 32epoch:train:11201-11300batch: iter_time=4.302, forward_time=0.435, loss_ctc=44.036, loss_att=46.879, acc=0.762, loss=46.026, backward_time=0.324, grad_norm=41.802, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.775e-04, train_time=6.085 +[gpub011:0/16] 2024-02-07 00:57:50,762 (trainer:762) INFO: 32epoch:train:11301-11400batch: iter_time=8.486e-05, forward_time=0.288, loss_ctc=44.898, loss_att=43.150, acc=0.762, loss=43.675, backward_time=0.294, grad_norm=40.471, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.775e-04, train_time=1.463 +[gpub011:0/16] 2024-02-07 01:00:04,740 (trainer:762) INFO: 32epoch:train:11401-11500batch: iter_time=8.010e-05, forward_time=0.290, loss_ctc=48.358, loss_att=46.149, acc=0.759, loss=46.812, backward_time=0.297, grad_norm=37.311, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.775e-04, train_time=1.340 +[gpub011:0/16] 2024-02-07 01:03:06,191 (trainer:762) INFO: 32epoch:train:11501-11600batch: iter_time=8.636e-05, forward_time=0.455, loss_ctc=48.483, loss_att=45.709, acc=0.778, loss=46.541, backward_time=0.336, grad_norm=37.217, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=1.774e-04, train_time=1.814 +[gpub011:0/16] 2024-02-07 01:05:45,613 (trainer:762) INFO: 32epoch:train:11601-11700batch: iter_time=8.442e-05, forward_time=0.291, loss_ctc=48.644, loss_att=46.043, acc=0.758, loss=46.823, backward_time=0.296, grad_norm=40.931, clip=100.000, loss_scale=1.080e+34, optim_step_time=0.094, optim0_lr0=1.774e-04, train_time=1.594 +[gpub011:0/16] 2024-02-07 01:07:42,764 (trainer:762) INFO: 32epoch:train:11701-11800batch: iter_time=8.233e-05, forward_time=0.289, loss_ctc=44.553, loss_att=45.574, acc=0.762, loss=45.268, backward_time=0.298, grad_norm=37.144, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.094, optim0_lr0=1.774e-04, train_time=1.171 +[gpub011:0/16] 2024-02-07 01:10:28,344 (trainer:762) INFO: 32epoch:train:11801-11900batch: iter_time=8.857e-05, forward_time=0.374, loss_ctc=39.446, loss_att=36.510, acc=0.770, loss=37.391, backward_time=0.389, grad_norm=34.917, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.105, optim0_lr0=1.774e-04, train_time=1.655 +[gpub011:0/16] 2024-02-07 01:12:41,638 (trainer:762) INFO: 32epoch:train:11901-12000batch: iter_time=8.186e-05, forward_time=0.293, loss_ctc=41.093, loss_att=39.459, acc=0.769, loss=39.949, backward_time=0.295, grad_norm=36.770, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.095, optim0_lr0=1.774e-04, train_time=1.332 +[gpub011:0/16] 2024-02-07 01:15:20,936 (trainer:762) INFO: 32epoch:train:12001-12100batch: iter_time=8.386e-05, forward_time=0.289, loss_ctc=49.486, loss_att=52.730, acc=0.734, loss=51.757, backward_time=0.294, grad_norm=42.297, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.095, optim0_lr0=1.773e-04, train_time=1.594 +[gpub011:0/16] 2024-02-07 01:18:17,324 (trainer:762) INFO: 32epoch:train:12101-12200batch: iter_time=8.502e-05, forward_time=0.350, loss_ctc=47.297, loss_att=47.919, acc=0.767, loss=47.733, backward_time=0.366, grad_norm=38.471, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.105, optim0_lr0=1.773e-04, train_time=1.764 +[gpub011:0/16] 2024-02-07 01:20:53,874 (trainer:762) INFO: 32epoch:train:12201-12300batch: iter_time=8.402e-05, forward_time=0.291, loss_ctc=51.491, loss_att=47.765, acc=0.772, loss=48.883, backward_time=0.299, grad_norm=40.947, clip=100.000, loss_scale=2.077e+34, optim_step_time=0.094, optim0_lr0=1.773e-04, train_time=1.564 +[gpub011:0/16] 2024-02-07 01:21:10,120 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 01:23:14,252 (trainer:762) INFO: 32epoch:train:12301-12400batch: iter_time=8.707e-05, forward_time=0.288, loss_ctc=51.392, loss_att=52.493, acc=0.753, loss=52.163, backward_time=0.297, grad_norm=46.862, clip=100.000, loss_scale=1.154e+34, optim_step_time=0.095, optim0_lr0=1.773e-04, train_time=1.404 +[gpub011:0/16] 2024-02-07 01:25:29,481 (trainer:762) INFO: 32epoch:train:12401-12500batch: iter_time=0.001, forward_time=0.414, loss_ctc=49.091, loss_att=47.047, acc=0.765, loss=47.660, backward_time=0.343, grad_norm=43.552, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=1.773e-04, train_time=1.352 +[gpub011:0/16] 2024-02-07 01:25:49,556 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-07 01:26:09,421 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 01:26:13,011 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 01:26:13,011 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-07 01:26:13,014 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 01:34:56,610 (trainer:762) INFO: 32epoch:train:12501-12600batch: iter_time=4.193, forward_time=0.310, loss_ctc=46.743, loss_att=48.778, acc=0.750, loss=48.167, backward_time=0.296, grad_norm=41.492, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.772e-04, train_time=5.671 +[gpub011:0/16] 2024-02-07 01:36:46,034 (trainer:762) INFO: 32epoch:train:12601-12700batch: iter_time=7.629e-05, forward_time=0.292, loss_ctc=46.676, loss_att=44.967, acc=0.757, loss=45.479, backward_time=0.299, grad_norm=38.495, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.772e-04, train_time=1.094 +[gpub011:0/16] 2024-02-07 01:40:11,883 (trainer:762) INFO: 32epoch:train:12701-12800batch: iter_time=8.396e-05, forward_time=0.469, loss_ctc=45.843, loss_att=46.282, acc=0.764, loss=46.150, backward_time=0.342, grad_norm=37.737, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.123, optim0_lr0=1.772e-04, train_time=2.058 +[gpub011:0/16] 2024-02-07 01:41:13,373 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 01:42:02,555 (trainer:762) INFO: 32epoch:train:12801-12900batch: iter_time=8.481e-05, forward_time=0.290, loss_ctc=51.188, loss_att=48.653, acc=0.750, loss=49.414, backward_time=0.298, grad_norm=42.773, clip=100.000, loss_scale=8.024e+33, optim_step_time=0.094, optim0_lr0=1.772e-04, train_time=1.107 +[gpub011:0/16] 2024-02-07 01:44:41,685 (trainer:762) INFO: 32epoch:train:12901-13000batch: iter_time=8.516e-05, forward_time=0.317, loss_ctc=42.631, loss_att=37.040, acc=0.780, loss=38.717, backward_time=0.292, grad_norm=36.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.772e-04, train_time=1.591 +[gpub011:0/16] 2024-02-07 01:47:00,836 (trainer:762) INFO: 32epoch:train:13001-13100batch: iter_time=8.451e-05, forward_time=0.317, loss_ctc=40.289, loss_att=38.652, acc=0.758, loss=39.143, backward_time=0.295, grad_norm=34.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.772e-04, train_time=1.392 +[gpub011:0/16] 2024-02-07 01:49:23,009 (trainer:762) INFO: 32epoch:train:13101-13200batch: iter_time=8.416e-05, forward_time=0.477, loss_ctc=43.557, loss_att=40.463, acc=0.762, loss=41.391, backward_time=0.337, grad_norm=37.424, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.771e-04, train_time=1.422 +[gpub011:0/16] 2024-02-07 01:52:15,222 (trainer:762) INFO: 32epoch:train:13201-13300batch: iter_time=8.139e-05, forward_time=0.291, loss_ctc=47.044, loss_att=48.783, acc=0.739, loss=48.261, backward_time=0.293, grad_norm=40.207, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.771e-04, train_time=1.722 +[gpub011:0/16] 2024-02-07 01:54:07,468 (trainer:762) INFO: 32epoch:train:13301-13400batch: iter_time=0.003, forward_time=0.317, loss_ctc=49.787, loss_att=54.632, acc=0.736, loss=53.178, backward_time=0.298, grad_norm=45.113, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.771e-04, train_time=1.122 +[gpub011:0/16] 2024-02-07 01:57:11,251 (trainer:762) INFO: 32epoch:train:13401-13500batch: iter_time=1.908e-04, forward_time=0.487, loss_ctc=43.478, loss_att=41.437, acc=0.763, loss=42.050, backward_time=0.335, grad_norm=37.793, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.771e-04, train_time=1.837 +[gpub011:0/16] 2024-02-07 01:59:37,049 (trainer:762) INFO: 32epoch:train:13501-13600batch: iter_time=8.343e-05, forward_time=0.295, loss_ctc=57.288, loss_att=56.055, acc=0.747, loss=56.425, backward_time=0.300, grad_norm=53.120, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.771e-04, train_time=1.458 +[gpub011:0/16] 2024-02-07 02:02:46,084 (trainer:762) INFO: 32epoch:train:13601-13700batch: iter_time=8.382e-05, forward_time=0.289, loss_ctc=49.822, loss_att=48.553, acc=0.756, loss=48.934, backward_time=0.293, grad_norm=41.790, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.770e-04, train_time=1.890 +[gpub011:0/16] 2024-02-07 02:04:26,381 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-07 02:04:46,102 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 02:04:49,712 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 02:04:49,713 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-07 02:04:49,716 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 02:12:08,558 (trainer:762) INFO: 32epoch:train:13701-13800batch: iter_time=4.017, forward_time=0.454, loss_ctc=43.502, loss_att=46.051, acc=0.759, loss=45.286, backward_time=0.365, grad_norm=41.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.770e-04, train_time=5.624 +[gpub011:0/16] 2024-02-07 02:14:29,269 (trainer:762) INFO: 32epoch:train:13801-13900batch: iter_time=7.444e-05, forward_time=0.286, loss_ctc=44.554, loss_att=40.434, acc=0.754, loss=41.670, backward_time=0.294, grad_norm=39.844, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.770e-04, train_time=1.407 +[gpub011:0/16] 2024-02-07 02:16:50,238 (trainer:762) INFO: 32epoch:train:13901-14000batch: iter_time=8.325e-05, forward_time=0.290, loss_ctc=48.991, loss_att=46.120, acc=0.759, loss=46.982, backward_time=0.297, grad_norm=39.046, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.770e-04, train_time=1.409 +[gpub011:0/16] 2024-02-07 02:19:53,929 (trainer:762) INFO: 32epoch:train:14001-14100batch: iter_time=7.065e-04, forward_time=0.492, loss_ctc=48.504, loss_att=44.033, acc=0.777, loss=45.374, backward_time=0.357, grad_norm=37.615, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.770e-04, train_time=1.834 +[gpub011:0/16] 2024-02-07 02:23:12,079 (trainer:762) INFO: 32epoch:train:14101-14200batch: iter_time=8.101e-05, forward_time=0.290, loss_ctc=48.670, loss_att=45.842, acc=0.756, loss=46.690, backward_time=0.295, grad_norm=40.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.770e-04, train_time=1.984 +[gpub011:0/16] 2024-02-07 02:25:30,752 (trainer:762) INFO: 32epoch:train:14201-14300batch: iter_time=8.172e-05, forward_time=0.290, loss_ctc=43.942, loss_att=42.335, acc=0.760, loss=42.817, backward_time=0.296, grad_norm=36.376, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.769e-04, train_time=1.387 +[gpub011:0/16] 2024-02-07 02:28:56,239 (trainer:762) INFO: 32epoch:train:14301-14400batch: iter_time=8.816e-05, forward_time=0.476, loss_ctc=39.539, loss_att=36.432, acc=0.767, loss=37.364, backward_time=0.335, grad_norm=34.444, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.769e-04, train_time=2.054 +[gpub011:0/16] 2024-02-07 02:31:26,790 (trainer:762) INFO: 32epoch:train:14401-14500batch: iter_time=8.171e-05, forward_time=0.287, loss_ctc=40.768, loss_att=39.218, acc=0.766, loss=39.683, backward_time=0.294, grad_norm=36.546, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.769e-04, train_time=1.504 +[gpub011:0/16] 2024-02-07 02:33:47,155 (trainer:762) INFO: 32epoch:train:14501-14600batch: iter_time=8.129e-05, forward_time=0.288, loss_ctc=49.482, loss_att=52.007, acc=0.724, loss=51.250, backward_time=0.295, grad_norm=44.006, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.769e-04, train_time=1.405 +[gpub011:0/16] 2024-02-07 02:37:17,914 (trainer:762) INFO: 32epoch:train:14601-14700batch: iter_time=2.222e-04, forward_time=0.431, loss_ctc=47.930, loss_att=48.852, acc=0.749, loss=48.576, backward_time=0.325, grad_norm=41.734, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.112, optim0_lr0=1.769e-04, train_time=2.107 +[gpub011:0/16] 2024-02-07 02:39:32,130 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 02:39:33,289 (trainer:762) INFO: 32epoch:train:14701-14800batch: iter_time=8.094e-05, forward_time=0.290, loss_ctc=50.739, loss_att=48.046, acc=0.759, loss=48.854, backward_time=0.298, grad_norm=42.282, clip=100.000, loss_scale=5.166e+33, optim_step_time=0.095, optim0_lr0=1.768e-04, train_time=1.354 +[gpub011:0/16] 2024-02-07 02:41:49,380 (trainer:762) INFO: 32epoch:train:14801-14900batch: iter_time=7.921e-05, forward_time=0.291, loss_ctc=51.085, loss_att=52.604, acc=0.747, loss=52.148, backward_time=0.297, grad_norm=47.546, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.768e-04, train_time=1.359 +[gpub011:0/16] 2024-02-07 02:45:24,409 (trainer:762) INFO: 32epoch:train:14901-15000batch: iter_time=1.417e-04, forward_time=0.375, loss_ctc=48.661, loss_att=46.015, acc=0.761, loss=46.809, backward_time=0.462, grad_norm=42.854, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.768e-04, train_time=2.151 +[gpub011:0/16] 2024-02-07 03:24:11,724 (trainer:361) INFO: 32epoch results: [train] iter_time=0.329, forward_time=0.337, loss_ctc=47.595, loss_att=46.402, acc=0.757, loss=46.760, backward_time=0.314, grad_norm=40.971, clip=100.000, loss_scale=7.002e+33, optim_step_time=0.099, optim0_lr0=1.782e-04, train_time=1.822, time=7 hours, 36 minutes and 2.07 seconds, total_count=510000, gpu_max_cached_mem_GB=41.025, [valid] loss_ctc=35.374, cer_ctc=0.183, loss_att=36.633, acc=0.691, cer=0.285, wer=0.991, loss=36.256, time=38 minutes and 22.69 seconds, total_count=158814, gpu_max_cached_mem_GB=41.025 +[gpub011:0/16] 2024-02-07 03:24:22,066 (trainer:416) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub011:0/16] 2024-02-07 03:24:22,115 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/22epoch.pth, exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/27epoch.pth +[gpub011:0/16] 2024-02-07 03:24:22,115 (trainer:290) INFO: 33/45epoch started. Estimated time to finish: 4 days, 7 hours and 53 minutes +[gpub011:0/16] 2024-02-07 03:24:22,126 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-07 03:24:41,154 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 03:24:44,753 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 03:24:44,753 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-07 03:24:44,757 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 03:32:08,086 (trainer:762) INFO: 33epoch:train:1-100batch: iter_time=3.384, forward_time=0.410, loss_ctc=54.758, loss_att=52.377, acc=0.749, loss=53.091, backward_time=0.314, grad_norm=57.261, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.768e-04, train_time=4.659 +[gpub011:0/16] 2024-02-07 03:34:37,824 (trainer:762) INFO: 33epoch:train:101-200batch: iter_time=9.064e-05, forward_time=0.292, loss_ctc=43.453, loss_att=45.231, acc=0.772, loss=44.697, backward_time=0.296, grad_norm=36.954, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.768e-04, train_time=1.497 +[gpub011:0/16] 2024-02-07 03:37:24,443 (trainer:762) INFO: 33epoch:train:201-300batch: iter_time=8.309e-05, forward_time=0.323, loss_ctc=38.588, loss_att=38.635, acc=0.758, loss=38.621, backward_time=0.303, grad_norm=34.128, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.768e-04, train_time=1.666 +[gpub011:0/16] 2024-02-07 03:39:44,321 (trainer:762) INFO: 33epoch:train:301-400batch: iter_time=8.845e-05, forward_time=0.389, loss_ctc=46.180, loss_att=44.825, acc=0.763, loss=45.231, backward_time=0.307, grad_norm=40.561, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.767e-04, train_time=1.399 +[gpub011:0/16] 2024-02-07 03:42:17,520 (trainer:762) INFO: 33epoch:train:401-500batch: iter_time=9.048e-05, forward_time=0.290, loss_ctc=44.692, loss_att=39.778, acc=0.761, loss=41.252, backward_time=0.293, grad_norm=43.749, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.767e-04, train_time=1.532 +[gpub011:0/16] 2024-02-07 03:44:24,488 (trainer:762) INFO: 33epoch:train:501-600batch: iter_time=8.376e-05, forward_time=0.303, loss_ctc=51.935, loss_att=51.103, acc=0.738, loss=51.353, backward_time=0.305, grad_norm=46.306, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.767e-04, train_time=1.269 +[gpub011:0/16] 2024-02-07 03:47:20,141 (trainer:762) INFO: 33epoch:train:601-700batch: iter_time=8.229e-05, forward_time=0.388, loss_ctc=43.189, loss_att=41.058, acc=0.768, loss=41.698, backward_time=0.341, grad_norm=35.520, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=1.767e-04, train_time=1.756 +[gpub011:0/16] 2024-02-07 03:49:22,329 (trainer:762) INFO: 33epoch:train:701-800batch: iter_time=8.435e-05, forward_time=0.298, loss_ctc=51.420, loss_att=50.426, acc=0.738, loss=50.724, backward_time=0.304, grad_norm=43.462, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.767e-04, train_time=1.222 +[gpub011:0/16] 2024-02-07 03:51:38,302 (trainer:762) INFO: 33epoch:train:801-900batch: iter_time=7.986e-05, forward_time=0.293, loss_ctc=43.078, loss_att=39.074, acc=0.757, loss=40.275, backward_time=0.296, grad_norm=39.667, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.766e-04, train_time=1.360 +[gpub011:0/16] 2024-02-07 03:53:48,283 (trainer:762) INFO: 33epoch:train:901-1000batch: iter_time=8.112e-05, forward_time=0.319, loss_ctc=49.383, loss_att=45.825, acc=0.759, loss=46.893, backward_time=0.305, grad_norm=47.822, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.766e-04, train_time=1.300 +[gpub011:0/16] 2024-02-07 03:56:20,920 (trainer:762) INFO: 33epoch:train:1001-1100batch: iter_time=8.014e-05, forward_time=0.331, loss_ctc=47.513, loss_att=53.905, acc=0.733, loss=51.987, backward_time=0.357, grad_norm=42.537, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.766e-04, train_time=1.525 +[gpub011:0/16] 2024-02-07 03:58:36,769 (trainer:762) INFO: 33epoch:train:1101-1200batch: iter_time=7.826e-05, forward_time=0.291, loss_ctc=43.589, loss_att=44.628, acc=0.754, loss=44.316, backward_time=0.295, grad_norm=38.426, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.766e-04, train_time=1.359 +[gpub011:0/16] 2024-02-07 03:59:59,429 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-07 04:00:18,934 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 04:00:22,788 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 04:00:22,788 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub011:0/16] 2024-02-07 04:00:22,791 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 04:06:40,462 (trainer:762) INFO: 33epoch:train:1201-1300batch: iter_time=3.467, forward_time=0.335, loss_ctc=47.919, loss_att=46.715, acc=0.767, loss=47.076, backward_time=0.309, grad_norm=38.638, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.766e-04, train_time=4.837 +[gpub011:0/16] 2024-02-07 04:09:16,610 (trainer:762) INFO: 33epoch:train:1301-1400batch: iter_time=8.603e-05, forward_time=0.338, loss_ctc=42.785, loss_att=41.247, acc=0.769, loss=41.709, backward_time=0.308, grad_norm=48.851, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.765e-04, train_time=1.561 +[gpub011:0/16] 2024-02-07 04:11:20,842 (trainer:762) INFO: 33epoch:train:1401-1500batch: iter_time=7.885e-05, forward_time=0.290, loss_ctc=44.192, loss_att=45.119, acc=0.767, loss=44.841, backward_time=0.296, grad_norm=35.573, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.765e-04, train_time=1.242 +[gpub011:0/16] 2024-02-07 04:13:45,320 (trainer:762) INFO: 33epoch:train:1501-1600batch: iter_time=7.771e-05, forward_time=0.292, loss_ctc=42.895, loss_att=40.070, acc=0.755, loss=40.918, backward_time=0.297, grad_norm=37.156, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.765e-04, train_time=1.445 +[gpub011:0/16] 2024-02-07 04:16:02,725 (trainer:762) INFO: 33epoch:train:1601-1700batch: iter_time=8.815e-05, forward_time=0.312, loss_ctc=44.009, loss_att=43.664, acc=0.746, loss=43.767, backward_time=0.324, grad_norm=41.142, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.765e-04, train_time=1.374 +[gpub011:0/16] 2024-02-07 04:18:53,818 (trainer:762) INFO: 33epoch:train:1701-1800batch: iter_time=8.873e-05, forward_time=0.382, loss_ctc=51.410, loss_att=45.763, acc=0.755, loss=47.457, backward_time=0.303, grad_norm=42.954, clip=100.000, loss_scale=2.622e+33, optim_step_time=0.131, optim0_lr0=1.765e-04, train_time=1.711 +[gpub011:0/16] 2024-02-07 04:21:06,022 (trainer:762) INFO: 33epoch:train:1801-1900batch: iter_time=9.146e-05, forward_time=0.287, loss_ctc=45.743, loss_att=42.914, acc=0.743, loss=43.763, backward_time=0.293, grad_norm=40.099, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.765e-04, train_time=1.322 +[gpub011:0/16] 2024-02-07 04:23:26,942 (trainer:762) INFO: 33epoch:train:1901-2000batch: iter_time=8.782e-05, forward_time=0.332, loss_ctc=45.452, loss_att=47.758, acc=0.739, loss=47.066, backward_time=0.311, grad_norm=39.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.764e-04, train_time=1.409 +[gpub011:0/16] 2024-02-07 04:26:16,414 (trainer:762) INFO: 33epoch:train:2001-2100batch: iter_time=8.359e-05, forward_time=0.425, loss_ctc=44.169, loss_att=39.793, acc=0.757, loss=41.106, backward_time=0.302, grad_norm=38.235, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.764e-04, train_time=1.695 +[gpub011:0/16] 2024-02-07 04:28:13,563 (trainer:762) INFO: 33epoch:train:2101-2200batch: iter_time=8.344e-05, forward_time=0.290, loss_ctc=51.202, loss_att=50.205, acc=0.726, loss=50.504, backward_time=0.295, grad_norm=50.712, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.764e-04, train_time=1.171 +[gpub011:0/16] 2024-02-07 04:30:41,776 (trainer:762) INFO: 33epoch:train:2201-2300batch: iter_time=8.247e-05, forward_time=0.288, loss_ctc=44.153, loss_att=42.818, acc=0.761, loss=43.219, backward_time=0.295, grad_norm=37.223, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.764e-04, train_time=1.482 +[gpub011:0/16] 2024-02-07 04:33:08,235 (trainer:762) INFO: 33epoch:train:2301-2400batch: iter_time=7.642e-05, forward_time=0.325, loss_ctc=47.628, loss_att=53.193, acc=0.734, loss=51.523, backward_time=0.310, grad_norm=39.733, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.764e-04, train_time=1.464 +[gpub011:0/16] 2024-02-07 04:35:27,559 (trainer:762) INFO: 33epoch:train:2401-2500batch: iter_time=9.193e-05, forward_time=0.334, loss_ctc=42.135, loss_att=39.916, acc=0.761, loss=40.581, backward_time=0.341, grad_norm=36.842, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.763e-04, train_time=1.393 +[gpub011:0/16] 2024-02-07 04:35:47,609 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-07 04:36:06,563 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 04:36:10,142 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 04:36:10,142 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub011:0/16] 2024-02-07 04:36:10,145 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 04:44:12,790 (trainer:762) INFO: 33epoch:train:2501-2600batch: iter_time=4.039, forward_time=0.291, loss_ctc=49.418, loss_att=50.917, acc=0.754, loss=50.467, backward_time=0.296, grad_norm=52.029, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.763e-04, train_time=5.252 +[gpub011:0/16] 2024-02-07 04:47:02,741 (trainer:762) INFO: 33epoch:train:2601-2700batch: iter_time=8.202e-05, forward_time=0.293, loss_ctc=42.309, loss_att=44.118, acc=0.777, loss=43.575, backward_time=0.298, grad_norm=34.433, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.763e-04, train_time=1.699 +[gpub011:0/16] 2024-02-07 04:49:11,319 (trainer:762) INFO: 33epoch:train:2701-2800batch: iter_time=3.836e-04, forward_time=0.389, loss_ctc=38.022, loss_att=38.128, acc=0.763, loss=38.096, backward_time=0.346, grad_norm=33.455, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.763e-04, train_time=1.286 +[gpub011:0/16] 2024-02-07 04:51:49,682 (trainer:762) INFO: 33epoch:train:2801-2900batch: iter_time=8.651e-05, forward_time=0.292, loss_ctc=44.923, loss_att=43.542, acc=0.769, loss=43.956, backward_time=0.296, grad_norm=39.462, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.763e-04, train_time=1.581 +[gpub011:0/16] 2024-02-07 04:54:07,202 (trainer:762) INFO: 33epoch:train:2901-3000batch: iter_time=8.259e-05, forward_time=0.297, loss_ctc=43.359, loss_att=38.484, acc=0.768, loss=39.946, backward_time=0.307, grad_norm=42.588, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.763e-04, train_time=1.377 +[gpub011:0/16] 2024-02-07 04:56:30,652 (trainer:762) INFO: 33epoch:train:3001-3100batch: iter_time=8.582e-05, forward_time=0.341, loss_ctc=50.629, loss_att=49.280, acc=0.742, loss=49.685, backward_time=0.307, grad_norm=44.269, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.762e-04, train_time=1.435 +[gpub011:0/16] 2024-02-07 04:58:48,398 (trainer:762) INFO: 33epoch:train:3101-3200batch: iter_time=1.209e-04, forward_time=0.376, loss_ctc=42.131, loss_att=39.985, acc=0.772, loss=40.629, backward_time=0.325, grad_norm=33.971, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.762e-04, train_time=1.377 +[gpub011:0/16] 2024-02-07 05:01:32,048 (trainer:762) INFO: 33epoch:train:3201-3300batch: iter_time=8.644e-05, forward_time=0.295, loss_ctc=49.351, loss_att=49.026, acc=0.742, loss=49.124, backward_time=0.307, grad_norm=38.322, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.762e-04, train_time=1.636 +[gpub011:0/16] 2024-02-07 05:03:51,131 (trainer:762) INFO: 33epoch:train:3301-3400batch: iter_time=8.306e-05, forward_time=0.331, loss_ctc=41.613, loss_att=39.344, acc=0.756, loss=40.025, backward_time=0.308, grad_norm=38.963, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.762e-04, train_time=1.390 +[gpub011:0/16] 2024-02-07 05:06:34,915 (trainer:762) INFO: 33epoch:train:3401-3500batch: iter_time=8.610e-05, forward_time=0.382, loss_ctc=48.968, loss_att=45.804, acc=0.761, loss=46.753, backward_time=0.312, grad_norm=46.152, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.762e-04, train_time=1.638 +[gpub011:0/16] 2024-02-07 05:08:29,022 (trainer:762) INFO: 33epoch:train:3501-3600batch: iter_time=8.751e-05, forward_time=0.293, loss_ctc=46.660, loss_att=53.535, acc=0.736, loss=51.472, backward_time=0.298, grad_norm=41.426, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.761e-04, train_time=1.141 +[gpub011:0/16] 2024-02-07 05:11:22,413 (trainer:762) INFO: 33epoch:train:3601-3700batch: iter_time=8.540e-05, forward_time=0.308, loss_ctc=42.695, loss_att=43.540, acc=0.759, loss=43.287, backward_time=0.317, grad_norm=37.000, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.761e-04, train_time=1.733 +[gpub011:0/16] 2024-02-07 05:12:45,341 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub011:0/16] 2024-02-07 05:13:05,075 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 05:13:08,950 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 05:13:08,951 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub011:0/16] 2024-02-07 05:13:08,954 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 05:20:26,742 (trainer:762) INFO: 33epoch:train:3701-3800batch: iter_time=3.971, forward_time=0.314, loss_ctc=47.139, loss_att=46.255, acc=0.770, loss=46.520, backward_time=0.298, grad_norm=37.580, clip=100.000, loss_scale=5.244e+33, optim_step_time=0.095, optim0_lr0=1.761e-04, train_time=5.444 +[gpub011:0/16] 2024-02-07 05:22:51,295 (trainer:762) INFO: 33epoch:train:3801-3900batch: iter_time=7.417e-05, forward_time=0.355, loss_ctc=41.709, loss_att=41.232, acc=0.772, loss=41.375, backward_time=0.298, grad_norm=47.352, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=1.761e-04, train_time=1.445 +[gpub011:0/16] 2024-02-07 05:25:25,637 (trainer:762) INFO: 33epoch:train:3901-4000batch: iter_time=8.013e-05, forward_time=0.299, loss_ctc=43.609, loss_att=45.107, acc=0.771, loss=44.658, backward_time=0.315, grad_norm=34.993, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.761e-04, train_time=1.543 +[gpub011:0/16] 2024-02-07 05:27:34,987 (trainer:762) INFO: 33epoch:train:4001-4100batch: iter_time=2.128e-04, forward_time=0.352, loss_ctc=42.344, loss_att=40.789, acc=0.756, loss=41.256, backward_time=0.395, grad_norm=36.383, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.761e-04, train_time=1.293 +[gpub011:0/16] 2024-02-07 05:30:08,133 (trainer:762) INFO: 33epoch:train:4101-4200batch: iter_time=8.198e-05, forward_time=0.286, loss_ctc=43.301, loss_att=44.130, acc=0.748, loss=43.881, backward_time=0.295, grad_norm=42.833, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.760e-04, train_time=1.531 +[gpub011:0/16] 2024-02-07 05:32:24,538 (trainer:762) INFO: 33epoch:train:4201-4300batch: iter_time=8.341e-05, forward_time=0.288, loss_ctc=50.856, loss_att=45.235, acc=0.757, loss=46.921, backward_time=0.296, grad_norm=43.357, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.760e-04, train_time=1.364 +[gpub011:0/16] 2024-02-07 05:34:46,485 (trainer:762) INFO: 33epoch:train:4301-4400batch: iter_time=8.563e-05, forward_time=0.329, loss_ctc=45.482, loss_att=43.088, acc=0.746, loss=43.806, backward_time=0.322, grad_norm=39.211, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.760e-04, train_time=1.419 +[gpub011:0/16] 2024-02-07 05:37:11,323 (trainer:762) INFO: 33epoch:train:4401-4500batch: iter_time=7.969e-05, forward_time=0.351, loss_ctc=45.219, loss_att=47.647, acc=0.741, loss=46.919, backward_time=0.346, grad_norm=38.857, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.760e-04, train_time=1.448 +[gpub011:0/16] 2024-02-07 05:40:00,285 (trainer:762) INFO: 33epoch:train:4501-4600batch: iter_time=8.123e-05, forward_time=0.287, loss_ctc=43.883, loss_att=39.633, acc=0.761, loss=40.908, backward_time=0.293, grad_norm=37.347, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.760e-04, train_time=1.690 +[gpub011:0/16] 2024-02-07 05:41:50,537 (trainer:762) INFO: 33epoch:train:4601-4700batch: iter_time=8.075e-05, forward_time=0.292, loss_ctc=49.736, loss_att=48.962, acc=0.730, loss=49.194, backward_time=0.297, grad_norm=50.559, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.759e-04, train_time=1.102 +[gpub011:0/16] 2024-02-07 05:44:24,377 (trainer:762) INFO: 33epoch:train:4701-4800batch: iter_time=7.816e-05, forward_time=0.322, loss_ctc=43.458, loss_att=42.098, acc=0.765, loss=42.506, backward_time=0.310, grad_norm=38.647, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.759e-04, train_time=1.538 +[gpub011:0/16] 2024-02-07 05:47:02,959 (trainer:762) INFO: 33epoch:train:4801-4900batch: iter_time=8.242e-05, forward_time=0.422, loss_ctc=47.841, loss_att=52.995, acc=0.736, loss=51.449, backward_time=0.318, grad_norm=38.490, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.759e-04, train_time=1.585 +[gpub011:0/16] 2024-02-07 05:49:18,338 (trainer:762) INFO: 33epoch:train:4901-5000batch: iter_time=7.864e-05, forward_time=0.286, loss_ctc=41.431, loss_att=39.158, acc=0.766, loss=39.840, backward_time=0.294, grad_norm=54.414, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.759e-04, train_time=1.354 +[gpub011:0/16] 2024-02-07 05:49:38,366 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub011:0/16] 2024-02-07 05:49:57,944 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 05:50:01,509 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 05:50:01,509 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub011:0/16] 2024-02-07 05:50:01,513 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 05:58:11,932 (trainer:762) INFO: 33epoch:train:5001-5100batch: iter_time=3.852, forward_time=0.438, loss_ctc=49.209, loss_att=48.970, acc=0.751, loss=49.041, backward_time=0.328, grad_norm=50.787, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.147, optim0_lr0=1.759e-04, train_time=5.336 +[gpub011:0/16] 2024-02-07 06:00:31,970 (trainer:762) INFO: 33epoch:train:5101-5200batch: iter_time=8.127e-05, forward_time=0.290, loss_ctc=42.094, loss_att=43.216, acc=0.779, loss=42.880, backward_time=0.296, grad_norm=34.174, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.759e-04, train_time=1.400 +[gpub011:0/16] 2024-02-07 06:02:44,479 (trainer:762) INFO: 33epoch:train:5201-5300batch: iter_time=8.026e-05, forward_time=0.287, loss_ctc=37.523, loss_att=35.637, acc=0.768, loss=36.203, backward_time=0.294, grad_norm=33.518, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.758e-04, train_time=1.325 +[gpub011:0/16] 2024-02-07 06:03:33,289 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 06:05:38,877 (trainer:762) INFO: 33epoch:train:5301-5400batch: iter_time=8.468e-05, forward_time=0.429, loss_ctc=44.499, loss_att=41.894, acc=0.767, loss=42.675, backward_time=0.337, grad_norm=38.620, clip=100.000, loss_scale=6.766e+33, optim_step_time=0.101, optim0_lr0=1.758e-04, train_time=1.744 +[gpub011:0/16] 2024-02-07 06:07:32,029 (trainer:762) INFO: 33epoch:train:5401-5500batch: iter_time=8.465e-05, forward_time=0.315, loss_ctc=43.125, loss_att=38.644, acc=0.757, loss=39.988, backward_time=0.296, grad_norm=41.488, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.758e-04, train_time=1.131 +[gpub011:0/16] 2024-02-07 06:10:30,034 (trainer:762) INFO: 33epoch:train:5501-5600batch: iter_time=8.444e-05, forward_time=0.290, loss_ctc=50.085, loss_att=48.620, acc=0.742, loss=49.059, backward_time=0.295, grad_norm=45.031, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.758e-04, train_time=1.780 +[gpub011:0/16] 2024-02-07 06:12:26,402 (trainer:762) INFO: 33epoch:train:5601-5700batch: iter_time=8.689e-05, forward_time=0.289, loss_ctc=42.259, loss_att=40.003, acc=0.771, loss=40.680, backward_time=0.294, grad_norm=35.655, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.758e-04, train_time=1.163 +[gpub011:0/16] 2024-02-07 06:15:15,244 (trainer:762) INFO: 33epoch:train:5701-5800batch: iter_time=2.398e-04, forward_time=0.397, loss_ctc=49.971, loss_att=49.803, acc=0.733, loss=49.854, backward_time=0.334, grad_norm=40.083, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.112, optim0_lr0=1.757e-04, train_time=1.689 +[gpub011:0/16] 2024-02-07 06:17:11,319 (trainer:762) INFO: 33epoch:train:5801-5900batch: iter_time=8.305e-05, forward_time=0.290, loss_ctc=41.071, loss_att=39.221, acc=0.749, loss=39.776, backward_time=0.294, grad_norm=38.920, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.757e-04, train_time=1.161 +[gpub011:0/16] 2024-02-07 06:19:51,284 (trainer:762) INFO: 33epoch:train:5901-6000batch: iter_time=8.961e-05, forward_time=0.289, loss_ctc=48.410, loss_att=45.240, acc=0.756, loss=46.191, backward_time=0.294, grad_norm=46.846, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.757e-04, train_time=1.599 +[gpub011:0/16] 2024-02-07 06:22:01,050 (trainer:762) INFO: 33epoch:train:6001-6100batch: iter_time=2.836e-04, forward_time=0.367, loss_ctc=46.167, loss_att=51.562, acc=0.729, loss=49.943, backward_time=0.342, grad_norm=39.771, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.757e-04, train_time=1.297 +[gpub011:0/16] 2024-02-07 06:24:31,874 (trainer:762) INFO: 33epoch:train:6101-6200batch: iter_time=8.718e-05, forward_time=0.322, loss_ctc=42.730, loss_att=42.694, acc=0.757, loss=42.704, backward_time=0.312, grad_norm=36.165, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.757e-04, train_time=1.508 +[gpub011:0/16] 2024-02-07 06:25:46,174 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub011:0/16] 2024-02-07 06:26:05,492 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 06:26:09,089 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 06:26:09,089 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub011:0/16] 2024-02-07 06:26:09,093 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 06:32:37,049 (trainer:762) INFO: 33epoch:train:6201-6300batch: iter_time=3.602, forward_time=0.290, loss_ctc=46.828, loss_att=47.840, acc=0.767, loss=47.537, backward_time=0.296, grad_norm=37.915, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.757e-04, train_time=4.852 +[gpub011:0/16] 2024-02-07 06:34:56,859 (trainer:762) INFO: 33epoch:train:6301-6400batch: iter_time=3.283e-04, forward_time=0.403, loss_ctc=41.762, loss_att=41.600, acc=0.779, loss=41.649, backward_time=0.333, grad_norm=45.692, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.118, optim0_lr0=1.756e-04, train_time=1.398 +[gpub011:0/16] 2024-02-07 06:37:09,728 (trainer:762) INFO: 33epoch:train:6401-6500batch: iter_time=8.071e-05, forward_time=0.290, loss_ctc=43.078, loss_att=45.004, acc=0.776, loss=44.426, backward_time=0.296, grad_norm=35.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.756e-04, train_time=1.329 +[gpub011:0/16] 2024-02-07 06:39:26,232 (trainer:762) INFO: 33epoch:train:6501-6600batch: iter_time=8.680e-05, forward_time=0.288, loss_ctc=42.213, loss_att=43.687, acc=0.754, loss=43.245, backward_time=0.298, grad_norm=37.346, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.756e-04, train_time=1.365 +[gpub011:0/16] 2024-02-07 06:42:18,813 (trainer:762) INFO: 33epoch:train:6601-6700batch: iter_time=8.490e-05, forward_time=0.364, loss_ctc=43.060, loss_att=43.107, acc=0.767, loss=43.093, backward_time=0.374, grad_norm=42.014, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.756e-04, train_time=1.725 +[gpub011:0/16] 2024-02-07 06:44:44,452 (trainer:762) INFO: 33epoch:train:6701-6800batch: iter_time=9.209e-05, forward_time=0.291, loss_ctc=50.609, loss_att=44.783, acc=0.764, loss=46.531, backward_time=0.300, grad_norm=43.141, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.756e-04, train_time=1.457 +[gpub011:0/16] 2024-02-07 06:46:57,949 (trainer:762) INFO: 33epoch:train:6801-6900batch: iter_time=8.248e-05, forward_time=0.291, loss_ctc=45.177, loss_att=43.203, acc=0.754, loss=43.795, backward_time=0.298, grad_norm=41.356, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.755e-04, train_time=1.335 +[gpub011:0/16] 2024-02-07 06:49:18,933 (trainer:762) INFO: 33epoch:train:6901-7000batch: iter_time=8.098e-05, forward_time=0.377, loss_ctc=44.956, loss_att=47.214, acc=0.749, loss=46.537, backward_time=0.354, grad_norm=38.319, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.755e-04, train_time=1.410 +[gpub011:0/16] 2024-02-07 06:51:54,387 (trainer:762) INFO: 33epoch:train:7001-7100batch: iter_time=8.769e-05, forward_time=0.289, loss_ctc=43.684, loss_att=39.718, acc=0.766, loss=40.908, backward_time=0.297, grad_norm=37.827, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.755e-04, train_time=1.554 +[gpub011:0/16] 2024-02-07 06:54:48,961 (trainer:762) INFO: 33epoch:train:7101-7200batch: iter_time=9.671e-04, forward_time=0.323, loss_ctc=49.118, loss_att=48.495, acc=0.741, loss=48.682, backward_time=0.328, grad_norm=51.388, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.755e-04, train_time=1.746 +[gpub011:0/16] 2024-02-07 06:56:49,452 (trainer:762) INFO: 33epoch:train:7201-7300batch: iter_time=8.372e-05, forward_time=0.372, loss_ctc=43.119, loss_att=43.940, acc=0.769, loss=43.694, backward_time=0.322, grad_norm=36.356, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.755e-04, train_time=1.205 +[gpub011:0/16] 2024-02-07 06:57:41,496 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 06:59:24,151 (trainer:762) INFO: 33epoch:train:7301-7400batch: iter_time=8.996e-05, forward_time=0.291, loss_ctc=46.732, loss_att=53.129, acc=0.748, loss=51.210, backward_time=0.301, grad_norm=37.671, clip=100.000, loss_scale=3.330e+33, optim_step_time=0.094, optim0_lr0=1.755e-04, train_time=1.547 +[gpub011:0/16] 2024-02-07 07:02:01,151 (trainer:762) INFO: 33epoch:train:7401-7500batch: iter_time=9.134e-05, forward_time=0.321, loss_ctc=41.397, loss_att=39.478, acc=0.766, loss=40.054, backward_time=0.317, grad_norm=35.571, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.754e-04, train_time=1.570 +[gpub011:0/16] 2024-02-07 07:02:21,180 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub011:0/16] 2024-02-07 07:02:40,743 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 07:02:44,619 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 07:02:44,619 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub011:0/16] 2024-02-07 07:02:44,623 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 07:10:58,131 (trainer:762) INFO: 33epoch:train:7501-7600batch: iter_time=3.900, forward_time=0.393, loss_ctc=48.330, loss_att=50.087, acc=0.751, loss=49.560, backward_time=0.306, grad_norm=51.360, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.754e-04, train_time=5.370 +[gpub011:0/16] 2024-02-07 07:13:11,750 (trainer:762) INFO: 33epoch:train:7601-7700batch: iter_time=8.196e-05, forward_time=0.289, loss_ctc=41.887, loss_att=43.789, acc=0.778, loss=43.219, backward_time=0.299, grad_norm=32.933, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.754e-04, train_time=1.336 +[gpub011:0/16] 2024-02-07 07:15:55,937 (trainer:762) INFO: 33epoch:train:7701-7800batch: iter_time=8.626e-05, forward_time=0.335, loss_ctc=37.443, loss_att=35.629, acc=0.768, loss=36.173, backward_time=0.302, grad_norm=33.942, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=1.754e-04, train_time=1.642 +[gpub011:0/16] 2024-02-07 07:18:31,965 (trainer:762) INFO: 33epoch:train:7801-7900batch: iter_time=3.527e-04, forward_time=0.336, loss_ctc=44.291, loss_att=42.476, acc=0.766, loss=43.020, backward_time=0.379, grad_norm=39.947, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.754e-04, train_time=1.560 +[gpub011:0/16] 2024-02-07 07:20:34,674 (trainer:762) INFO: 33epoch:train:7901-8000batch: iter_time=8.207e-05, forward_time=0.288, loss_ctc=43.192, loss_att=39.293, acc=0.755, loss=40.463, backward_time=0.295, grad_norm=41.128, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.754e-04, train_time=1.227 +[gpub011:0/16] 2024-02-07 07:23:28,648 (trainer:762) INFO: 33epoch:train:8001-8100batch: iter_time=8.390e-05, forward_time=0.325, loss_ctc=49.714, loss_att=47.940, acc=0.745, loss=48.472, backward_time=0.322, grad_norm=43.118, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.753e-04, train_time=1.739 +[gpub011:0/16] 2024-02-07 07:26:03,828 (trainer:762) INFO: 33epoch:train:8101-8200batch: iter_time=8.547e-05, forward_time=0.385, loss_ctc=41.546, loss_att=39.438, acc=0.772, loss=40.071, backward_time=0.330, grad_norm=38.274, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.753e-04, train_time=1.552 +[gpub011:0/16] 2024-02-07 07:28:18,224 (trainer:762) INFO: 33epoch:train:8201-8300batch: iter_time=8.306e-05, forward_time=0.291, loss_ctc=48.806, loss_att=48.857, acc=0.735, loss=48.842, backward_time=0.298, grad_norm=40.487, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.753e-04, train_time=1.344 +[gpub011:0/16] 2024-02-07 07:30:49,607 (trainer:762) INFO: 33epoch:train:8301-8400batch: iter_time=8.643e-05, forward_time=0.286, loss_ctc=40.815, loss_att=39.223, acc=0.750, loss=39.701, backward_time=0.292, grad_norm=39.423, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.753e-04, train_time=1.514 +[gpub011:0/16] 2024-02-07 07:33:32,173 (trainer:762) INFO: 33epoch:train:8401-8500batch: iter_time=8.310e-04, forward_time=0.421, loss_ctc=48.942, loss_att=45.378, acc=0.756, loss=46.447, backward_time=0.324, grad_norm=48.492, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.112, optim0_lr0=1.753e-04, train_time=1.625 +[gpub011:0/16] 2024-02-07 07:35:44,059 (trainer:762) INFO: 33epoch:train:8501-8600batch: iter_time=7.970e-05, forward_time=0.289, loss_ctc=45.549, loss_att=51.392, acc=0.732, loss=49.639, backward_time=0.295, grad_norm=39.873, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.752e-04, train_time=1.319 +[gpub011:0/16] 2024-02-07 07:38:19,054 (trainer:762) INFO: 33epoch:train:8601-8700batch: iter_time=7.842e-05, forward_time=0.293, loss_ctc=42.124, loss_att=41.721, acc=0.760, loss=41.842, backward_time=0.297, grad_norm=37.067, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.752e-04, train_time=1.550 +[gpub011:0/16] 2024-02-07 07:39:37,235 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub011:0/16] 2024-02-07 07:39:56,504 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 07:40:00,116 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 07:40:00,116 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub011:0/16] 2024-02-07 07:40:00,119 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 07:48:30,754 (trainer:762) INFO: 33epoch:train:8701-8800batch: iter_time=4.685, forward_time=0.530, loss_ctc=46.953, loss_att=47.738, acc=0.768, loss=47.502, backward_time=0.327, grad_norm=38.035, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.752e-04, train_time=6.117 +[gpub011:0/16] 2024-02-07 07:50:49,779 (trainer:762) INFO: 33epoch:train:8801-8900batch: iter_time=8.345e-05, forward_time=0.288, loss_ctc=39.753, loss_att=40.715, acc=0.779, loss=40.426, backward_time=0.294, grad_norm=43.601, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.752e-04, train_time=1.390 +[gpub011:0/16] 2024-02-07 07:53:10,028 (trainer:762) INFO: 33epoch:train:8901-9000batch: iter_time=8.159e-05, forward_time=0.292, loss_ctc=43.538, loss_att=44.906, acc=0.776, loss=44.496, backward_time=0.302, grad_norm=34.509, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.752e-04, train_time=1.403 +[gpub011:0/16] 2024-02-07 07:55:50,246 (trainer:762) INFO: 33epoch:train:9001-9100batch: iter_time=8.429e-05, forward_time=0.363, loss_ctc=41.879, loss_att=43.353, acc=0.753, loss=42.911, backward_time=0.429, grad_norm=37.862, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.109, optim0_lr0=1.752e-04, train_time=1.602 +[gpub011:0/16] 2024-02-07 07:58:31,429 (trainer:762) INFO: 33epoch:train:9101-9200batch: iter_time=8.211e-05, forward_time=0.289, loss_ctc=42.991, loss_att=43.051, acc=0.767, loss=43.033, backward_time=0.295, grad_norm=40.455, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.751e-04, train_time=1.612 +[gpub011:0/16] 2024-02-07 08:00:48,528 (trainer:762) INFO: 33epoch:train:9201-9300batch: iter_time=8.090e-05, forward_time=0.292, loss_ctc=50.204, loss_att=44.498, acc=0.764, loss=46.210, backward_time=0.300, grad_norm=41.608, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.751e-04, train_time=1.371 +[gpub011:0/16] 2024-02-07 08:03:30,149 (trainer:762) INFO: 33epoch:train:9301-9400batch: iter_time=8.456e-05, forward_time=0.289, loss_ctc=44.574, loss_att=42.724, acc=0.755, loss=43.279, backward_time=0.294, grad_norm=40.990, clip=100.000, loss_scale=4.439e+33, optim_step_time=0.094, optim0_lr0=1.751e-04, train_time=1.616 +[gpub011:0/16] 2024-02-07 08:06:21,284 (trainer:762) INFO: 33epoch:train:9401-9500batch: iter_time=8.335e-05, forward_time=0.485, loss_ctc=44.331, loss_att=46.982, acc=0.751, loss=46.187, backward_time=0.339, grad_norm=37.142, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.751e-04, train_time=1.711 +[gpub011:0/16] 2024-02-07 08:09:04,580 (trainer:762) INFO: 33epoch:train:9501-9600batch: iter_time=8.143e-05, forward_time=0.291, loss_ctc=43.619, loss_att=39.471, acc=0.766, loss=40.715, backward_time=0.294, grad_norm=37.294, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.751e-04, train_time=1.632 +[gpub011:0/16] 2024-02-07 08:11:06,317 (trainer:762) INFO: 33epoch:train:9601-9700batch: iter_time=7.939e-05, forward_time=0.289, loss_ctc=48.512, loss_att=48.235, acc=0.742, loss=48.318, backward_time=0.299, grad_norm=47.941, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.750e-04, train_time=1.217 +[gpub011:0/16] 2024-02-07 08:13:22,881 (trainer:762) INFO: 33epoch:train:9701-9800batch: iter_time=8.674e-05, forward_time=0.290, loss_ctc=43.085, loss_att=43.779, acc=0.769, loss=43.571, backward_time=0.295, grad_norm=37.131, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.750e-04, train_time=1.366 +[gpub011:0/16] 2024-02-07 08:16:43,072 (trainer:762) INFO: 33epoch:train:9801-9900batch: iter_time=8.200e-05, forward_time=0.472, loss_ctc=46.926, loss_att=53.512, acc=0.747, loss=51.536, backward_time=0.328, grad_norm=38.074, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.750e-04, train_time=2.001 +[gpub011:0/16] 2024-02-07 08:19:03,655 (trainer:762) INFO: 33epoch:train:9901-10000batch: iter_time=7.804e-05, forward_time=0.288, loss_ctc=40.950, loss_att=39.538, acc=0.768, loss=39.962, backward_time=0.295, grad_norm=36.370, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.750e-04, train_time=1.406 +[gpub011:0/16] 2024-02-07 08:19:23,683 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub011:0/16] 2024-02-07 08:19:43,465 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 08:19:47,035 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 08:19:47,036 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub011:0/16] 2024-02-07 08:19:47,039 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 08:27:47,697 (trainer:762) INFO: 33epoch:train:10001-10100batch: iter_time=3.758, forward_time=0.289, loss_ctc=48.504, loss_att=49.867, acc=0.751, loss=49.458, backward_time=0.296, grad_norm=53.130, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.750e-04, train_time=5.240 +[gpub011:0/16] 2024-02-07 08:30:37,350 (trainer:762) INFO: 33epoch:train:10101-10200batch: iter_time=8.003e-05, forward_time=0.431, loss_ctc=42.034, loss_att=43.325, acc=0.781, loss=42.938, backward_time=0.330, grad_norm=34.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.109, optim0_lr0=1.750e-04, train_time=1.696 +[gpub011:0/16] 2024-02-07 08:32:54,052 (trainer:762) INFO: 33epoch:train:10201-10300batch: iter_time=8.091e-05, forward_time=0.288, loss_ctc=37.415, loss_att=35.957, acc=0.767, loss=36.395, backward_time=0.293, grad_norm=31.521, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.749e-04, train_time=1.367 +[gpub011:0/16] 2024-02-07 08:35:00,497 (trainer:762) INFO: 33epoch:train:10301-10400batch: iter_time=8.636e-05, forward_time=0.289, loss_ctc=44.085, loss_att=41.805, acc=0.769, loss=42.489, backward_time=0.295, grad_norm=37.628, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.749e-04, train_time=1.264 +[gpub011:0/16] 2024-02-07 08:37:57,088 (trainer:762) INFO: 33epoch:train:10401-10500batch: iter_time=8.272e-05, forward_time=0.458, loss_ctc=42.990, loss_att=38.873, acc=0.758, loss=40.108, backward_time=0.327, grad_norm=41.528, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.749e-04, train_time=1.765 +[gpub011:0/16] 2024-02-07 08:40:04,200 (trainer:762) INFO: 33epoch:train:10501-10600batch: iter_time=7.862e-05, forward_time=0.298, loss_ctc=49.698, loss_att=48.423, acc=0.743, loss=48.806, backward_time=0.300, grad_norm=43.109, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.749e-04, train_time=1.271 +[gpub011:0/16] 2024-02-07 08:42:37,302 (trainer:762) INFO: 33epoch:train:10601-10700batch: iter_time=8.075e-05, forward_time=0.289, loss_ctc=41.410, loss_att=39.544, acc=0.772, loss=40.104, backward_time=0.293, grad_norm=34.624, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.749e-04, train_time=1.531 +[gpub011:0/16] 2024-02-07 08:44:45,153 (trainer:762) INFO: 33epoch:train:10701-10800batch: iter_time=8.718e-05, forward_time=0.291, loss_ctc=48.792, loss_att=49.385, acc=0.736, loss=49.207, backward_time=0.306, grad_norm=40.669, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.748e-04, train_time=1.278 +[gpub011:0/16] 2024-02-07 08:47:26,813 (trainer:762) INFO: 33epoch:train:10801-10900batch: iter_time=8.857e-05, forward_time=0.396, loss_ctc=40.595, loss_att=39.125, acc=0.751, loss=39.566, backward_time=0.309, grad_norm=40.555, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.748e-04, train_time=1.616 +[gpub011:0/16] 2024-02-07 08:49:31,261 (trainer:762) INFO: 33epoch:train:10901-11000batch: iter_time=8.270e-05, forward_time=0.290, loss_ctc=47.264, loss_att=44.616, acc=0.760, loss=45.410, backward_time=0.295, grad_norm=45.748, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.748e-04, train_time=1.244 +[gpub011:0/16] 2024-02-07 08:51:45,406 (trainer:762) INFO: 33epoch:train:11001-11100batch: iter_time=7.848e-05, forward_time=0.291, loss_ctc=46.199, loss_att=51.759, acc=0.730, loss=50.091, backward_time=0.296, grad_norm=41.155, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.748e-04, train_time=1.342 +[gpub011:0/16] 2024-02-07 08:54:29,764 (trainer:762) INFO: 33epoch:train:11101-11200batch: iter_time=2.147e-04, forward_time=0.430, loss_ctc=41.785, loss_att=42.580, acc=0.757, loss=42.341, backward_time=0.335, grad_norm=36.322, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.748e-04, train_time=1.642 +[gpub011:0/16] 2024-02-07 08:55:44,536 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub011:0/16] 2024-02-07 08:56:04,322 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 08:56:07,923 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 08:56:07,923 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-07 08:56:07,927 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 09:05:43,551 (trainer:762) INFO: 33epoch:train:11201-11300batch: iter_time=5.443, forward_time=0.291, loss_ctc=47.064, loss_att=46.170, acc=0.768, loss=46.438, backward_time=0.298, grad_norm=37.224, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.748e-04, train_time=6.738 +[gpub011:0/16] 2024-02-07 09:10:44,775 (trainer:762) INFO: 33epoch:train:11301-11400batch: iter_time=8.226e-05, forward_time=0.298, loss_ctc=39.728, loss_att=39.949, acc=0.775, loss=39.883, backward_time=0.296, grad_norm=42.542, clip=100.000, loss_scale=8.879e+33, optim_step_time=0.097, optim0_lr0=1.747e-04, train_time=3.012 +[gpub011:0/16] 2024-02-07 09:13:39,536 (trainer:762) INFO: 33epoch:train:11401-11500batch: iter_time=8.233e-05, forward_time=0.425, loss_ctc=42.951, loss_att=44.258, acc=0.774, loss=43.866, backward_time=0.315, grad_norm=34.318, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.747e-04, train_time=1.747 +[gpub011:0/16] 2024-02-07 09:17:31,970 (trainer:762) INFO: 33epoch:train:11501-11600batch: iter_time=7.811e-05, forward_time=0.292, loss_ctc=41.725, loss_att=40.260, acc=0.759, loss=40.699, backward_time=0.296, grad_norm=36.064, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.747e-04, train_time=2.323 +[gpub011:0/16] 2024-02-07 09:19:52,762 (trainer:762) INFO: 33epoch:train:11601-11700batch: iter_time=8.512e-05, forward_time=0.287, loss_ctc=43.116, loss_att=43.050, acc=0.753, loss=43.070, backward_time=0.294, grad_norm=40.152, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.747e-04, train_time=1.409 +[gpub011:0/16] 2024-02-07 09:22:51,761 (trainer:762) INFO: 33epoch:train:11701-11800batch: iter_time=8.350e-05, forward_time=0.391, loss_ctc=50.514, loss_att=45.222, acc=0.760, loss=46.809, backward_time=0.363, grad_norm=45.404, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.747e-04, train_time=1.790 +[gpub011:0/16] 2024-02-07 09:24:57,633 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 09:25:03,052 (trainer:762) INFO: 33epoch:train:11801-11900batch: iter_time=8.318e-05, forward_time=0.288, loss_ctc=44.713, loss_att=42.415, acc=0.750, loss=43.104, backward_time=0.294, grad_norm=40.692, clip=100.000, loss_scale=1.012e+34, optim_step_time=0.094, optim0_lr0=1.747e-04, train_time=1.313 +[gpub011:0/16] 2024-02-07 09:28:15,288 (trainer:762) INFO: 33epoch:train:11901-12000batch: iter_time=8.803e-05, forward_time=0.292, loss_ctc=44.331, loss_att=47.317, acc=0.743, loss=46.422, backward_time=0.293, grad_norm=38.311, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.746e-04, train_time=1.921 +[gpub011:0/16] 2024-02-07 09:30:38,878 (trainer:762) INFO: 33epoch:train:12001-12100batch: iter_time=8.892e-05, forward_time=0.288, loss_ctc=43.310, loss_att=39.290, acc=0.763, loss=40.496, backward_time=0.298, grad_norm=39.135, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.746e-04, train_time=1.437 +[gpub011:0/16] 2024-02-07 09:33:29,218 (trainer:762) INFO: 33epoch:train:12101-12200batch: iter_time=9.052e-05, forward_time=0.427, loss_ctc=48.273, loss_att=48.180, acc=0.733, loss=48.208, backward_time=0.352, grad_norm=49.055, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.746e-04, train_time=1.703 +[gpub011:0/16] 2024-02-07 09:36:22,020 (trainer:762) INFO: 33epoch:train:12201-12300batch: iter_time=8.708e-05, forward_time=0.289, loss_ctc=42.643, loss_att=41.525, acc=0.768, loss=41.860, backward_time=0.294, grad_norm=37.967, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.746e-04, train_time=1.728 +[gpub011:0/16] 2024-02-07 09:38:38,632 (trainer:762) INFO: 33epoch:train:12301-12400batch: iter_time=2.029e-04, forward_time=0.301, loss_ctc=46.555, loss_att=51.619, acc=0.741, loss=50.100, backward_time=0.309, grad_norm=38.633, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.746e-04, train_time=1.365 +[gpub011:0/16] 2024-02-07 09:41:44,685 (trainer:762) INFO: 33epoch:train:12401-12500batch: iter_time=9.317e-05, forward_time=0.374, loss_ctc=41.259, loss_att=38.547, acc=0.769, loss=39.361, backward_time=0.378, grad_norm=35.519, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.745e-04, train_time=1.861 +[gpub011:0/16] 2024-02-07 09:42:04,713 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub011:0/16] 2024-02-07 09:42:24,413 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 09:42:27,977 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 09:42:27,977 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub011:0/16] 2024-02-07 09:42:27,981 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 09:50:29,714 (trainer:762) INFO: 33epoch:train:12501-12600batch: iter_time=3.866, forward_time=0.293, loss_ctc=49.518, loss_att=51.049, acc=0.758, loss=50.590, backward_time=0.296, grad_norm=52.471, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.745e-04, train_time=5.250 +[gpub011:0/16] 2024-02-07 09:52:50,389 (trainer:762) INFO: 33epoch:train:12601-12700batch: iter_time=8.983e-05, forward_time=0.289, loss_ctc=41.633, loss_att=43.650, acc=0.781, loss=43.045, backward_time=0.295, grad_norm=36.159, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=1.745e-04, train_time=1.406 +[gpub011:0/16] 2024-02-07 09:55:41,668 (trainer:762) INFO: 33epoch:train:12701-12800batch: iter_time=7.836e-05, forward_time=0.431, loss_ctc=37.203, loss_att=38.013, acc=0.766, loss=37.770, backward_time=0.321, grad_norm=33.306, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.745e-04, train_time=1.713 +[gpub011:0/16] 2024-02-07 09:58:21,394 (trainer:762) INFO: 33epoch:train:12801-12900batch: iter_time=8.087e-05, forward_time=0.293, loss_ctc=44.368, loss_att=44.152, acc=0.769, loss=44.217, backward_time=0.296, grad_norm=38.324, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.745e-04, train_time=1.597 +[gpub011:0/16] 2024-02-07 10:00:34,379 (trainer:762) INFO: 33epoch:train:12901-13000batch: iter_time=8.749e-05, forward_time=0.289, loss_ctc=42.944, loss_att=38.859, acc=0.769, loss=40.084, backward_time=0.296, grad_norm=39.803, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.745e-04, train_time=1.330 +[gpub011:0/16] 2024-02-07 10:03:27,146 (trainer:762) INFO: 33epoch:train:13001-13100batch: iter_time=8.305e-05, forward_time=0.376, loss_ctc=49.182, loss_att=48.845, acc=0.746, loss=48.946, backward_time=0.397, grad_norm=43.816, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.744e-04, train_time=1.727 +[gpub011:0/16] 2024-02-07 10:05:16,349 (trainer:762) INFO: 33epoch:train:13101-13200batch: iter_time=8.129e-05, forward_time=0.290, loss_ctc=41.822, loss_att=39.600, acc=0.775, loss=40.267, backward_time=0.296, grad_norm=37.990, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.744e-04, train_time=1.092 +[gpub011:0/16] 2024-02-07 10:08:22,060 (trainer:762) INFO: 33epoch:train:13201-13300batch: iter_time=7.988e-05, forward_time=0.290, loss_ctc=48.773, loss_att=48.814, acc=0.747, loss=48.802, backward_time=0.296, grad_norm=40.377, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.744e-04, train_time=1.857 +[gpub011:0/16] 2024-02-07 10:10:58,758 (trainer:762) INFO: 33epoch:train:13301-13400batch: iter_time=2.660e-04, forward_time=0.387, loss_ctc=40.498, loss_att=37.991, acc=0.765, loss=38.743, backward_time=0.362, grad_norm=38.417, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.744e-04, train_time=1.567 +[gpub011:0/16] 2024-02-07 10:13:10,604 (trainer:762) INFO: 33epoch:train:13401-13500batch: iter_time=8.092e-05, forward_time=0.290, loss_ctc=47.492, loss_att=44.658, acc=0.767, loss=45.508, backward_time=0.296, grad_norm=45.047, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.744e-04, train_time=1.318 +[gpub011:0/16] 2024-02-07 10:15:48,384 (trainer:762) INFO: 33epoch:train:13501-13600batch: iter_time=8.257e-05, forward_time=0.290, loss_ctc=45.449, loss_att=52.856, acc=0.740, loss=50.634, backward_time=0.295, grad_norm=39.335, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.744e-04, train_time=1.578 +[gpub011:0/16] 2024-02-07 10:18:41,089 (trainer:762) INFO: 33epoch:train:13601-13700batch: iter_time=1.740e-04, forward_time=0.399, loss_ctc=42.001, loss_att=43.294, acc=0.763, loss=42.906, backward_time=0.339, grad_norm=36.350, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.743e-04, train_time=1.727 +[gpub011:0/16] 2024-02-07 10:19:56,732 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub011:0/16] 2024-02-07 10:20:16,178 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 10:20:20,031 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 10:20:20,031 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-07 10:20:20,034 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 10:27:13,479 (trainer:762) INFO: 33epoch:train:13701-13800batch: iter_time=3.814, forward_time=0.294, loss_ctc=46.746, loss_att=47.063, acc=0.772, loss=46.968, backward_time=0.298, grad_norm=37.740, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.743e-04, train_time=5.124 +[gpub011:0/16] 2024-02-07 10:29:50,101 (trainer:762) INFO: 33epoch:train:13801-13900batch: iter_time=7.953e-05, forward_time=0.296, loss_ctc=39.826, loss_att=39.435, acc=0.783, loss=39.552, backward_time=0.316, grad_norm=43.817, clip=100.000, loss_scale=5.452e+33, optim_step_time=0.096, optim0_lr0=1.743e-04, train_time=1.566 +[gpub011:0/16] 2024-02-07 10:32:40,039 (trainer:762) INFO: 33epoch:train:13901-14000batch: iter_time=3.159e-04, forward_time=0.406, loss_ctc=42.826, loss_att=44.540, acc=0.778, loss=44.026, backward_time=0.315, grad_norm=34.650, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.111, optim0_lr0=1.743e-04, train_time=1.699 +[gpub011:0/16] 2024-02-07 10:34:58,953 (trainer:762) INFO: 33epoch:train:14001-14100batch: iter_time=7.692e-05, forward_time=0.292, loss_ctc=41.726, loss_att=43.652, acc=0.753, loss=43.074, backward_time=0.296, grad_norm=36.791, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.743e-04, train_time=1.389 +[gpub011:0/16] 2024-02-07 10:38:03,323 (trainer:762) INFO: 33epoch:train:14101-14200batch: iter_time=1.910e-04, forward_time=0.371, loss_ctc=42.556, loss_att=42.281, acc=0.769, loss=42.363, backward_time=0.381, grad_norm=37.517, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=1.742e-04, train_time=1.844 +[gpub011:0/16] 2024-02-07 10:40:43,876 (trainer:762) INFO: 33epoch:train:14201-14300batch: iter_time=8.023e-05, forward_time=0.288, loss_ctc=49.766, loss_att=44.434, acc=0.765, loss=46.034, backward_time=0.297, grad_norm=41.561, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.742e-04, train_time=1.605 +[gpub011:0/16] 2024-02-07 10:43:52,350 (trainer:762) INFO: 33epoch:train:14301-14400batch: iter_time=8.259e-05, forward_time=0.287, loss_ctc=44.837, loss_att=42.828, acc=0.755, loss=43.431, backward_time=0.293, grad_norm=40.900, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.742e-04, train_time=1.884 +[gpub011:0/16] 2024-02-07 10:46:23,168 (trainer:762) INFO: 33epoch:train:14401-14500batch: iter_time=1.869e-04, forward_time=0.388, loss_ctc=44.548, loss_att=47.518, acc=0.750, loss=46.627, backward_time=0.355, grad_norm=37.903, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.101, optim0_lr0=1.742e-04, train_time=1.508 +[gpub011:0/16] 2024-02-07 10:49:01,505 (trainer:762) INFO: 33epoch:train:14501-14600batch: iter_time=8.323e-05, forward_time=0.288, loss_ctc=42.598, loss_att=38.393, acc=0.772, loss=39.655, backward_time=0.293, grad_norm=35.734, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.742e-04, train_time=1.583 +[gpub011:0/16] 2024-02-07 10:51:39,905 (trainer:762) INFO: 33epoch:train:14601-14700batch: iter_time=8.161e-05, forward_time=0.289, loss_ctc=48.074, loss_att=47.355, acc=0.745, loss=47.571, backward_time=0.295, grad_norm=48.705, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.742e-04, train_time=1.584 +[gpub011:0/16] 2024-02-07 10:54:43,867 (trainer:762) INFO: 33epoch:train:14701-14800batch: iter_time=3.993e-04, forward_time=0.466, loss_ctc=42.380, loss_att=43.290, acc=0.773, loss=43.017, backward_time=0.320, grad_norm=35.832, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.102, optim0_lr0=1.741e-04, train_time=1.839 +[gpub011:0/16] 2024-02-07 10:57:29,434 (trainer:762) INFO: 33epoch:train:14801-14900batch: iter_time=9.116e-05, forward_time=0.293, loss_ctc=46.716, loss_att=52.743, acc=0.752, loss=50.935, backward_time=0.295, grad_norm=37.630, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.741e-04, train_time=1.655 +[gpub011:0/16] 2024-02-07 10:59:49,311 (trainer:762) INFO: 33epoch:train:14901-15000batch: iter_time=9.363e-05, forward_time=0.289, loss_ctc=41.152, loss_att=39.614, acc=0.768, loss=40.075, backward_time=0.294, grad_norm=35.578, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.741e-04, train_time=1.398 +[gpub011:0/16] 2024-02-07 11:40:37,449 (trainer:361) INFO: 33epoch results: [train] iter_time=0.319, forward_time=0.328, loss_ctc=44.786, loss_att=44.310, acc=0.758, loss=44.453, backward_time=0.312, grad_norm=40.231, clip=100.000, loss_scale=5.643e+33, optim_step_time=0.098, optim0_lr0=1.754e-04, train_time=1.822, time=7 hours, 35 minutes and 51.39 seconds, total_count=525000, gpu_max_cached_mem_GB=41.025, [valid] loss_ctc=35.538, cer_ctc=0.182, loss_att=36.677, acc=0.696, cer=0.301, wer=0.991, loss=36.335, time=40 minutes and 23.58 seconds, total_count=163485, gpu_max_cached_mem_GB=41.025 +[gpub011:0/16] 2024-02-07 11:40:47,889 (trainer:416) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub011:0/16] 2024-02-07 11:40:47,932 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/28epoch.pth +[gpub011:0/16] 2024-02-07 11:40:47,932 (trainer:290) INFO: 34/45epoch started. Estimated time to finish: 4 days, 34 minutes and 25.43 seconds +[gpub011:0/16] 2024-02-07 11:40:47,941 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub011:0/16] 2024-02-07 11:41:06,904 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 11:41:10,406 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 11:41:10,406 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub011:0/16] 2024-02-07 11:41:10,410 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 11:49:33,549 (trainer:762) INFO: 34epoch:train:1-100batch: iter_time=4.083, forward_time=0.324, loss_ctc=57.803, loss_att=56.908, acc=0.726, loss=57.177, backward_time=0.308, grad_norm=51.142, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=1.741e-04, train_time=5.256 +[gpub011:0/16] 2024-02-07 11:51:44,168 (trainer:762) INFO: 34epoch:train:101-200batch: iter_time=2.070e-04, forward_time=0.375, loss_ctc=51.589, loss_att=45.672, acc=0.748, loss=47.447, backward_time=0.298, grad_norm=41.423, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.107, optim0_lr0=1.741e-04, train_time=1.306 +[gpub011:0/16] 2024-02-07 11:53:50,337 (trainer:762) INFO: 34epoch:train:201-300batch: iter_time=8.738e-05, forward_time=0.297, loss_ctc=39.117, loss_att=35.778, acc=0.763, loss=36.780, backward_time=0.306, grad_norm=35.847, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.741e-04, train_time=1.261 +[gpub011:0/16] 2024-02-07 11:56:14,867 (trainer:762) INFO: 34epoch:train:301-400batch: iter_time=8.591e-05, forward_time=0.325, loss_ctc=49.054, loss_att=51.009, acc=0.742, loss=50.422, backward_time=0.318, grad_norm=44.612, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.740e-04, train_time=1.444 +[gpub011:0/16] 2024-02-07 11:58:17,990 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub011:0/16] 2024-02-07 11:58:21,232 (trainer:762) INFO: 34epoch:train:401-500batch: iter_time=8.245e-05, forward_time=0.306, loss_ctc=46.921, loss_att=43.434, acc=0.746, loss=44.480, backward_time=0.294, grad_norm=41.854, clip=100.000, loss_scale=1.023e+34, optim_step_time=0.095, optim0_lr0=1.740e-04, train_time=1.264 +[gpub011:0/16] 2024-02-07 12:00:31,160 (trainer:762) INFO: 34epoch:train:501-600batch: iter_time=2.320e-04, forward_time=0.340, loss_ctc=46.608, loss_att=47.161, acc=0.753, loss=46.995, backward_time=0.326, grad_norm=44.347, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.740e-04, train_time=1.299 +[gpub011:0/16] 2024-02-07 12:02:46,530 (trainer:762) INFO: 34epoch:train:601-700batch: iter_time=9.218e-05, forward_time=0.335, loss_ctc=45.118, loss_att=46.794, acc=0.763, loss=46.291, backward_time=0.326, grad_norm=37.205, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.740e-04, train_time=1.353 +[gpub011:0/16] 2024-02-07 12:04:58,805 (trainer:762) INFO: 34epoch:train:701-800batch: iter_time=9.103e-05, forward_time=0.307, loss_ctc=37.397, loss_att=35.741, acc=0.783, loss=36.238, backward_time=0.297, grad_norm=33.088, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.740e-04, train_time=1.323 +[gpub011:0/16] 2024-02-07 12:07:19,098 (trainer:762) INFO: 34epoch:train:801-900batch: iter_time=5.311e-04, forward_time=0.404, loss_ctc=47.204, loss_att=42.097, acc=0.756, loss=43.629, backward_time=0.335, grad_norm=44.033, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.739e-04, train_time=1.403 +[gpub011:0/16] 2024-02-07 12:09:27,701 (trainer:762) INFO: 34epoch:train:901-1000batch: iter_time=8.877e-05, forward_time=0.320, loss_ctc=52.580, loss_att=51.226, acc=0.747, loss=51.632, backward_time=0.306, grad_norm=45.479, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.739e-04, train_time=1.285 +[gpub011:0/16] 2024-02-07 12:11:54,628 (trainer:762) INFO: 34epoch:train:1001-1100batch: iter_time=4.150e-04, forward_time=0.384, loss_ctc=47.013, loss_att=40.146, acc=0.764, loss=42.206, backward_time=0.298, grad_norm=44.114, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.739e-04, train_time=1.469 +[gpub011:0/16] 2024-02-07 12:14:04,646 (trainer:762) INFO: 34epoch:train:1101-1200batch: iter_time=8.979e-05, forward_time=0.295, loss_ctc=46.837, loss_att=40.938, acc=0.749, loss=42.708, backward_time=0.304, grad_norm=41.385, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.739e-04, train_time=1.301 +[gpub011:0/16] 2024-02-07 12:15:32,357 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub011:0/16] 2024-02-07 12:15:51,505 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 12:15:55,104 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 12:15:55,104 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub011:0/16] 2024-02-07 12:15:55,133 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub011:0/16] 2024-02-07 12:22:58,478 (trainer:762) INFO: 34epoch:train:1201-1300batch: iter_time=3.944, forward_time=0.336, loss_ctc=48.432, loss_att=49.120, acc=0.748, loss=48.913, backward_time=0.324, grad_norm=42.455, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.739e-04, train_time=5.338 +[gpub011:0/16] 2024-02-07 12:25:11,841 (trainer:762) INFO: 34epoch:train:1301-1400batch: iter_time=2.427e-04, forward_time=0.349, loss_ctc=52.650, loss_att=50.247, acc=0.758, loss=50.968, backward_time=0.333, grad_norm=45.551, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.739e-04, train_time=1.334 +[gpub011:0/16] 2024-02-07 12:27:37,843 (trainer:762) INFO: 34epoch:train:1401-1500batch: iter_time=7.695e-05, forward_time=0.295, loss_ctc=43.650, loss_att=39.755, acc=0.758, loss=40.924, backward_time=0.309, grad_norm=35.772, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.738e-04, train_time=1.459 +[gpub011:0/16] 2024-02-07 12:30:07,240 (trainer:762) INFO: 34epoch:train:1501-1600batch: iter_time=2.417e-04, forward_time=0.330, loss_ctc=42.673, loss_att=38.455, acc=0.768, loss=39.721, backward_time=0.314, grad_norm=38.815, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.738e-04, train_time=1.495 +[gpub011:0/16] 2024-02-07 12:32:27,151 (trainer:762) INFO: 34epoch:train:1601-1700batch: iter_time=7.926e-05, forward_time=0.338, loss_ctc=49.872, loss_att=51.821, acc=0.753, loss=51.236, backward_time=0.304, grad_norm=40.288, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.738e-04, train_time=1.399 +[gpub011:0/16] 2024-02-07 12:34:52,240 (trainer:762) INFO: 34epoch:train:1701-1800batch: iter_time=8.687e-04, forward_time=0.357, loss_ctc=46.736, loss_att=49.068, acc=0.738, loss=48.368, backward_time=0.304, grad_norm=41.664, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.738e-04, train_time=1.451 +[gpub011:0/16] 2024-02-07 12:37:32,852 (trainer:762) INFO: 34epoch:train:1801-1900batch: iter_time=7.864e-05, forward_time=0.380, loss_ctc=43.029, loss_att=43.211, acc=0.788, loss=43.156, backward_time=0.338, grad_norm=36.760, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.738e-04, train_time=1.604 +[gpub011:0/16] 2024-02-07 12:39:39,293 (trainer:762) INFO: 34epoch:train:1901-2000batch: iter_time=8.192e-05, forward_time=0.303, loss_ctc=41.553, loss_att=38.434, acc=0.790, loss=39.370, backward_time=0.295, grad_norm=32.905, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.738e-04, train_time=1.266 +[gpub011:0/16] 2024-02-07 12:42:21,982 (trainer:762) INFO: 34epoch:train:2001-2100batch: iter_time=1.493e-04, forward_time=0.346, loss_ctc=42.009, loss_att=40.585, acc=0.767, loss=41.013, backward_time=0.320, grad_norm=39.190, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.737e-04, train_time=1.626 +[gpub011:0/16] 2024-02-07 12:44:39,212 (trainer:762) INFO: 34epoch:train:2101-2200batch: iter_time=8.302e-05, forward_time=0.330, loss_ctc=48.605, loss_att=49.716, acc=0.752, loss=49.383, backward_time=0.351, grad_norm=39.157, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.737e-04, train_time=1.372 +[gpub011:0/16] 2024-02-07 12:47:08,228 (trainer:762) INFO: 34epoch:train:2201-2300batch: iter_time=7.907e-05, forward_time=0.302, loss_ctc=49.412, loss_att=46.259, acc=0.759, loss=47.205, backward_time=0.301, grad_norm=45.881, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.737e-04, train_time=1.487 +[gpub011:0/16] 2024-02-07 12:49:51,300 (trainer:762) INFO: 34epoch:train:2301-2400batch: iter_time=2.586e-04, forward_time=0.349, loss_ctc=43.064, loss_att=37.859, acc=0.775, loss=39.420, backward_time=0.317, grad_norm=36.658, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.737e-04, train_time=1.633 +[gpub011:0/16] 2024-02-07 12:52:00,280 (trainer:762) INFO: 34epoch:train:2401-2500batch: iter_time=7.787e-05, forward_time=0.323, loss_ctc=44.749, loss_att=40.300, acc=0.767, loss=41.635, backward_time=0.314, grad_norm=37.986, clip=100.000, loss_scale=5.348e+33, optim_step_time=0.095, optim0_lr0=1.737e-04, train_time=1.289 +[gpub011:0/16] 2024-02-07 12:52:20,337 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub011:0/16] 2024-02-07 12:52:39,597 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub011:0/16] 2024-02-07 12:52:43,217 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub011:0/16] 2024-02-07 12:52:43,217 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub011:0/16] 2024-02-07 12:52:43,220 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.7.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.7.log new file mode 100644 index 0000000000000000000000000000000000000000..45027d5beec42fd68ea96d47b14ab744be740466 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.7.log @@ -0,0 +1,1729 @@ +# Running on gpub058.delta.ncsa.illinois.edu +# Started at Sun Feb 4 17:38:07 CST 2024 +# SLURMD_NODENAME=gpub058 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2938841 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1707262664 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2938841 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[058-061]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1707089864 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[058-061]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=2127211 +# SLURM_TOPOLOGY_ADDR=ss00.ss11.gpub058 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_f3531bf8-892e-4ab6-b64a-bfb8246edf7b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_f3531bf8-892e-4ab6-b64a-bfb8246edf7b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_f3531bf8-892e-4ab6-b64a-bfb8246edf7b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_f3531bf8-892e-4ab6-b64a-bfb8246edf7b +_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_f3531bf8-892e-4ab6-b64a-bfb8246edf7b +[gpub058:0/16] 2024-02-04 17:40:58,608 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub058:0/16] 2024-02-04 17:41:00,622 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub058:0/16] 2024-02-04 17:41:00,693 (s2t:464) INFO: Vocabulary size: 50002 +[gpub058:0/16] 2024-02-04 17:41:09,108 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub058:0/16] 2024-02-04 17:41:09,114 (abs_task:1232) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub058:0/16] 2024-02-04 17:41:09,114 (abs_task:1235) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub058:0/16] 2024-02-04 17:41:09,114 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub058:0/16] 2024-02-04 17:41:09,117 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub058:0/16] 2024-02-04 17:41:14,453 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 17:41:15,372 (abs_task:1616) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 17:41:15,372 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub058:0/16] 2024-02-04 17:41:15,373 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 17:41:29,051 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub058:2127280:2127280 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2127280:2127280 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2127280:2127280 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub058:0/16] 2024-02-04 17:41:34,492 (trainer:287) INFO: 27/45epoch started +[gpub058:0/16] 2024-02-04 17:41:34,551 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-04 17:41:52,523 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 17:41:55,961 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 17:41:55,961 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-04 17:41:55,964 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub059:2626468:2626468 [2] NCCL INFO cudaDriverVersion 12020 +gpub059:2626468:2626468 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:2626468:2626468 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:2626468:2626532 [2] NCCL INFO NET/IB : No device found. +gpub059:2626468:2626532 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.159<0> [1]hsn0:141.142.145.159<0> +gpub059:2626468:2626532 [2] NCCL INFO Using network Socket +gpub059:2626468:2626532 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub059:2626468:2626532 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub059:2626468:2626532 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub059:2626468:2626532 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub059:2626468:2626532 [2] NCCL INFO Connected all rings +gpub059:2626468:2626532 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub059:2626468:2626532 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub059:2626468:2626532 [2] NCCL INFO Connected all trees +gpub059:2626468:2626532 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub059:2626468:2626532 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:2626468:2626532 [2] NCCL INFO comm 0x1609ff70 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub059:2626467:2626467 [1] NCCL INFO cudaDriverVersion 12020 +gpub059:2626467:2626467 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:2626467:2626467 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:2626467:2626533 [1] NCCL INFO NET/IB : No device found. +gpub059:2626467:2626533 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.159<0> [1]hsn0:141.142.145.159<0> +gpub059:2626467:2626533 [1] NCCL INFO Using network Socket +gpub059:2626467:2626533 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub059:2626467:2626533 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub059:2626467:2626533 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub059:2626467:2626533 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub059:2626467:2626533 [1] NCCL INFO Connected all rings +gpub059:2626467:2626533 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub059:2626467:2626533 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub059:2626467:2626533 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub059:2626467:2626533 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub059:2626467:2626533 [1] NCCL INFO Connected all trees +gpub059:2626467:2626533 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub059:2626467:2626533 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:2626467:2626533 [1] NCCL INFO comm 0x17c88d70 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub059:2626466:2626466 [0] NCCL INFO cudaDriverVersion 12020 +gpub059:2626466:2626466 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:2626466:2626466 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:2626466:2626534 [0] NCCL INFO NET/IB : No device found. +gpub059:2626466:2626534 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.159<0> [1]hsn0:141.142.145.159<0> +gpub059:2626466:2626534 [0] NCCL INFO Using network Socket +gpub059:2626466:2626534 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub059:2626466:2626534 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub059:2626466:2626534 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub059:2626466:2626534 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub059:2626466:2626534 [0] NCCL INFO Connected all rings +gpub059:2626466:2626534 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub059:2626466:2626534 [0] NCCL INFO Connected all trees +gpub059:2626466:2626534 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub059:2626466:2626534 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:2626466:2626534 [0] NCCL INFO comm 0x16200a70 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub059:2626469:2626469 [3] NCCL INFO cudaDriverVersion 12020 +gpub059:2626469:2626469 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:2626469:2626469 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:2626469:2626535 [3] NCCL INFO NET/IB : No device found. +gpub059:2626469:2626535 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.159<0> [1]hsn0:141.142.145.159<0> +gpub059:2626469:2626535 [3] NCCL INFO Using network Socket +gpub059:2626469:2626535 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub059:2626469:2626535 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub059:2626469:2626535 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub059:2626469:2626535 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub059:2626469:2626535 [3] NCCL INFO Connected all rings +gpub059:2626469:2626535 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub059:2626469:2626535 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub059:2626469:2626535 [3] NCCL INFO Connected all trees +gpub059:2626469:2626535 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub059:2626469:2626535 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:2626469:2626535 [3] NCCL INFO comm 0xfed8830 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:2127281:2127281 [1] NCCL INFO cudaDriverVersion 12020 +gpub058:2127281:2127281 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2127281:2127281 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2127281:2127336 [1] NCCL INFO NET/IB : No device found. +gpub058:2127281:2127336 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2127281:2127336 [1] NCCL INFO Using network Socket +gpub058:2127281:2127336 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub058:2127281:2127336 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub058:2127281:2127336 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub058:2127281:2127336 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub058:2127281:2127336 [1] NCCL INFO Connected all rings +gpub058:2127281:2127336 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub058:2127281:2127336 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub058:2127281:2127336 [1] NCCL INFO Connected all trees +gpub058:2127281:2127336 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2127281:2127336 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2127281:2127336 [1] NCCL INFO comm 0xc9e7a80 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub058:2127280:2127337 [0] NCCL INFO NET/IB : No device found. +gpub058:2127280:2127337 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2127280:2127337 [0] NCCL INFO Using network Socket +gpub058:2127280:2127337 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub058:2127280:2127337 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub058:2127280:2127337 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub058:2127280:2127337 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub058:2127280:2127337 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2127280:2127337 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2127280:2127337 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub058:2127280:2127337 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub058:2127280:2127337 [0] NCCL INFO Connected all rings +gpub058:2127280:2127337 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub058:2127280:2127337 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2127280:2127337 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/Socket/1 +gpub058:2127280:2127337 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub058:2127280:2127337 [0] NCCL INFO Connected all trees +gpub058:2127280:2127337 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2127280:2127337 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2127280:2127337 [0] NCCL INFO comm 0x75b2a1a0 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub058:2127282:2127282 [2] NCCL INFO cudaDriverVersion 12020 +gpub058:2127282:2127282 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2127282:2127282 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2127282:2127338 [2] NCCL INFO NET/IB : No device found. +gpub058:2127282:2127338 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2127282:2127338 [2] NCCL INFO Using network Socket +gpub058:2127282:2127338 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub058:2127282:2127338 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub058:2127282:2127338 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub058:2127282:2127338 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub058:2127282:2127338 [2] NCCL INFO Connected all rings +gpub058:2127282:2127338 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub058:2127282:2127338 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub058:2127282:2127338 [2] NCCL INFO Connected all trees +gpub058:2127282:2127338 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2127282:2127338 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2127282:2127338 [2] NCCL INFO comm 0xe0e31d0 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:2127283:2127283 [3] NCCL INFO cudaDriverVersion 12020 +gpub058:2127283:2127283 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:2127283:2127283 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:2127283:2127339 [3] NCCL INFO NET/IB : No device found. +gpub058:2127283:2127339 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:2127283:2127339 [3] NCCL INFO Using network Socket +gpub058:2127283:2127339 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub058:2127283:2127339 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub058:2127283:2127339 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub058:2127283:2127339 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub058:2127283:2127339 [3] NCCL INFO Connected all rings +gpub058:2127283:2127339 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub058:2127283:2127339 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub058:2127283:2127339 [3] NCCL INFO Connected all trees +gpub058:2127283:2127339 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub058:2127283:2127339 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:2127283:2127339 [3] NCCL INFO comm 0x1cb25c80 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub061:2529191:2529191 [3] NCCL INFO cudaDriverVersion 12020 +gpub061:2529191:2529191 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2529191:2529191 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2529191:2529247 [3] NCCL INFO NET/IB : No device found. +gpub061:2529191:2529247 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2529191:2529247 [3] NCCL INFO Using network Socket +gpub061:2529191:2529247 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub061:2529191:2529247 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub061:2529191:2529247 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub061:2529191:2529247 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub061:2529191:2529247 [3] NCCL INFO Connected all rings +gpub061:2529191:2529247 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub061:2529191:2529247 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub061:2529191:2529247 [3] NCCL INFO Connected all trees +gpub061:2529191:2529247 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2529191:2529247 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2529191:2529247 [3] NCCL INFO comm 0x1456cd30 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub061:2529190:2529190 [2] NCCL INFO cudaDriverVersion 12020 +gpub061:2529190:2529190 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2529190:2529190 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2529190:2529249 [2] NCCL INFO NET/IB : No device found. +gpub061:2529190:2529249 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2529190:2529249 [2] NCCL INFO Using network Socket +gpub061:2529190:2529249 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub061:2529190:2529249 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub061:2529190:2529249 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub061:2529190:2529249 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub061:2529190:2529249 [2] NCCL INFO Connected all rings +gpub061:2529190:2529249 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub061:2529190:2529249 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub061:2529190:2529249 [2] NCCL INFO Connected all trees +gpub061:2529190:2529249 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2529190:2529249 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2529190:2529249 [2] NCCL INFO comm 0x1b848230 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub061:2529189:2529189 [1] NCCL INFO cudaDriverVersion 12020 +gpub061:2529189:2529189 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2529189:2529189 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2529189:2529250 [1] NCCL INFO NET/IB : No device found. +gpub061:2529189:2529250 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2529189:2529250 [1] NCCL INFO Using network Socket +gpub061:2529189:2529250 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub061:2529189:2529250 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub061:2529189:2529250 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub061:2529189:2529250 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub061:2529189:2529250 [1] NCCL INFO Connected all rings +gpub061:2529189:2529250 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub061:2529189:2529250 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub061:2529189:2529250 [1] NCCL INFO Connected all trees +gpub061:2529189:2529250 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2529189:2529250 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2529189:2529250 [1] NCCL INFO comm 0xcf04490 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub060:3090775:3090775 [2] NCCL INFO cudaDriverVersion 12020 +gpub060:3090775:3090775 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:3090775:3090775 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:3090775:3090841 [2] NCCL INFO NET/IB : No device found. +gpub060:3090775:3090841 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.160<0> [1]hsn0:141.142.145.160<0> [2]eth0:fe80::60cc:de69:5e72:f184%eth0<0> +gpub060:3090775:3090841 [2] NCCL INFO Using network Socket +gpub060:3090775:3090841 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub060:3090775:3090841 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub060:3090775:3090841 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub060:3090775:3090841 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub060:3090775:3090841 [2] NCCL INFO Connected all rings +gpub060:3090775:3090841 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub060:3090775:3090841 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub060:3090775:3090841 [2] NCCL INFO Connected all trees +gpub060:3090775:3090841 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub060:3090775:3090841 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:3090775:3090841 [2] NCCL INFO comm 0x19fb48b0 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub060:3090776:3090776 [3] NCCL INFO cudaDriverVersion 12020 +gpub060:3090776:3090776 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:3090776:3090776 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:3090776:3090840 [3] NCCL INFO NET/IB : No device found. +gpub060:3090776:3090840 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.160<0> [1]hsn0:141.142.145.160<0> [2]eth0:fe80::60cc:de69:5e72:f184%eth0<0> +gpub060:3090776:3090840 [3] NCCL INFO Using network Socket +gpub060:3090776:3090840 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub060:3090776:3090840 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub060:3090776:3090840 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub060:3090776:3090840 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub060:3090776:3090840 [3] NCCL INFO Connected all rings +gpub060:3090776:3090840 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub060:3090776:3090840 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub060:3090776:3090840 [3] NCCL INFO Connected all trees +gpub060:3090776:3090840 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub060:3090776:3090840 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:3090776:3090840 [3] NCCL INFO comm 0x76cb3130 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub060:3090773:3090773 [0] NCCL INFO cudaDriverVersion 12020 +gpub060:3090773:3090773 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:3090773:3090773 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:3090773:3090839 [0] NCCL INFO NET/IB : No device found. +gpub060:3090773:3090839 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.160<0> [1]hsn0:141.142.145.160<0> [2]eth0:fe80::60cc:de69:5e72:f184%eth0<0> +gpub060:3090773:3090839 [0] NCCL INFO Using network Socket +gpub060:3090773:3090839 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub060:3090773:3090839 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub060:3090773:3090839 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub060:3090773:3090839 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub060:3090773:3090839 [0] NCCL INFO Connected all rings +gpub060:3090773:3090839 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub060:3090773:3090839 [0] NCCL INFO Connected all trees +gpub060:3090773:3090839 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub060:3090773:3090839 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:3090773:3090839 [0] NCCL INFO comm 0x17eefe80 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub060:3090774:3090774 [1] NCCL INFO cudaDriverVersion 12020 +gpub060:3090774:3090774 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:3090774:3090774 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:3090774:3090838 [1] NCCL INFO NET/IB : No device found. +gpub060:3090774:3090838 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.160<0> [1]hsn0:141.142.145.160<0> [2]eth0:fe80::60cc:de69:5e72:f184%eth0<0> +gpub060:3090774:3090838 [1] NCCL INFO Using network Socket +gpub060:3090774:3090838 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub060:3090774:3090838 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub060:3090774:3090838 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub060:3090774:3090838 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub060:3090774:3090838 [1] NCCL INFO Connected all rings +gpub060:3090774:3090838 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub060:3090774:3090838 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub060:3090774:3090838 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub060:3090774:3090838 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub060:3090774:3090838 [1] NCCL INFO Connected all trees +gpub060:3090774:3090838 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub060:3090774:3090838 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:3090774:3090838 [1] NCCL INFO comm 0x10c3c050 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub061:2529188:2529188 [0] NCCL INFO cudaDriverVersion 12020 +gpub061:2529188:2529188 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:2529188:2529188 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:2529188:2529248 [0] NCCL INFO NET/IB : No device found. +gpub061:2529188:2529248 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.161<0> [1]hsn0:141.142.145.161<0> [2]eth0:fe80::29e2:373:e40d:cf83%eth0<0> +gpub061:2529188:2529248 [0] NCCL INFO Using network Socket +gpub061:2529188:2529248 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub061:2529188:2529248 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub061:2529188:2529248 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub061:2529188:2529248 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub061:2529188:2529248 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub061:2529188:2529248 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub061:2529188:2529248 [0] NCCL INFO Connected all rings +gpub061:2529188:2529248 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub061:2529188:2529248 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub061:2529188:2529248 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub061:2529188:2529248 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub061:2529188:2529248 [0] NCCL INFO Connected all trees +gpub061:2529188:2529248 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub061:2529188:2529248 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:2529188:2529248 [0] NCCL INFO comm 0x78945ad0 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +[gpub058:0/16] 2024-02-04 17:46:29,106 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub058:0/16] 2024-02-04 17:49:01,736 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 17:49:18,251 (trainer:740) INFO: 27epoch:train:1-100batch: iter_time=1.259, forward_time=0.499, loss_ctc=54.760, loss_att=57.381, acc=0.711, loss=56.595, backward_time=0.312, grad_norm=43.713, clip=100.000, loss_scale=4.851e+33, optim_step_time=0.096, optim0_lr0=1.961e-04, train_time=4.622 +[gpub058:0/16] 2024-02-04 17:52:06,083 (trainer:740) INFO: 27epoch:train:101-200batch: iter_time=9.253e-05, forward_time=0.382, loss_ctc=52.047, loss_att=60.543, acc=0.727, loss=57.994, backward_time=0.328, grad_norm=41.866, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.961e-04, train_time=1.693 +[gpub058:0/16] 2024-02-04 17:54:35,625 (trainer:740) INFO: 27epoch:train:201-300batch: iter_time=9.590e-05, forward_time=0.393, loss_ctc=49.940, loss_att=44.887, acc=0.762, loss=46.403, backward_time=0.380, grad_norm=37.811, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.961e-04, train_time=1.494 +[gpub058:0/16] 2024-02-04 17:56:52,271 (trainer:740) INFO: 27epoch:train:301-400batch: iter_time=9.816e-05, forward_time=0.308, loss_ctc=47.556, loss_att=43.425, acc=0.775, loss=44.665, backward_time=0.296, grad_norm=36.856, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.960e-04, train_time=1.367 +[gpub058:0/16] 2024-02-04 17:59:33,295 (trainer:740) INFO: 27epoch:train:401-500batch: iter_time=9.924e-05, forward_time=0.411, loss_ctc=46.270, loss_att=43.206, acc=0.761, loss=44.125, backward_time=0.314, grad_norm=35.962, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.960e-04, train_time=1.610 +[gpub058:0/16] 2024-02-04 18:02:32,042 (trainer:740) INFO: 27epoch:train:501-600batch: iter_time=8.737e-05, forward_time=0.372, loss_ctc=42.226, loss_att=36.180, acc=0.761, loss=37.994, backward_time=0.328, grad_norm=35.579, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.960e-04, train_time=1.787 +[gpub058:0/16] 2024-02-04 18:05:14,571 (trainer:740) INFO: 27epoch:train:601-700batch: iter_time=6.522e-04, forward_time=0.415, loss_ctc=48.940, loss_att=40.745, acc=0.751, loss=43.203, backward_time=0.335, grad_norm=39.062, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.102, optim0_lr0=1.960e-04, train_time=1.626 +[gpub058:0/16] 2024-02-04 18:08:00,034 (trainer:740) INFO: 27epoch:train:701-800batch: iter_time=8.653e-05, forward_time=0.331, loss_ctc=50.212, loss_att=49.336, acc=0.744, loss=49.599, backward_time=0.315, grad_norm=38.879, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.959e-04, train_time=1.653 +[gpub058:0/16] 2024-02-04 18:11:22,267 (trainer:740) INFO: 27epoch:train:801-900batch: iter_time=2.474e-04, forward_time=0.395, loss_ctc=40.518, loss_att=42.937, acc=0.748, loss=42.211, backward_time=0.313, grad_norm=36.438, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.959e-04, train_time=2.024 +[gpub058:0/16] 2024-02-04 18:14:18,745 (trainer:740) INFO: 27epoch:train:901-1000batch: iter_time=8.188e-05, forward_time=0.348, loss_ctc=51.677, loss_att=50.205, acc=0.721, loss=50.646, backward_time=0.321, grad_norm=42.830, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.959e-04, train_time=1.764 +[gpub058:0/16] 2024-02-04 18:17:31,879 (trainer:740) INFO: 27epoch:train:1001-1100batch: iter_time=2.029e-04, forward_time=0.325, loss_ctc=51.675, loss_att=43.353, acc=0.754, loss=45.850, backward_time=0.305, grad_norm=42.824, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.959e-04, train_time=1.932 +[gpub058:0/16] 2024-02-04 18:20:47,875 (trainer:740) INFO: 27epoch:train:1101-1200batch: iter_time=3.151e-04, forward_time=0.413, loss_ctc=49.734, loss_att=50.180, acc=0.740, loss=50.046, backward_time=0.349, grad_norm=37.476, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.958e-04, train_time=1.959 +[gpub058:0/16] 2024-02-04 18:22:32,279 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-04 18:22:51,051 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 18:22:54,492 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 18:22:54,492 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-04 18:22:54,747 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 18:29:07,987 (trainer:740) INFO: 27epoch:train:1201-1300batch: iter_time=3.313, forward_time=0.291, loss_ctc=51.249, loss_att=51.175, acc=0.723, loss=51.197, backward_time=0.283, grad_norm=45.196, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=1.958e-04, train_time=5.001 +[gpub058:0/16] 2024-02-04 18:31:55,425 (trainer:740) INFO: 27epoch:train:1301-1400batch: iter_time=9.312e-05, forward_time=0.386, loss_ctc=55.860, loss_att=61.776, acc=0.713, loss=60.001, backward_time=0.340, grad_norm=44.908, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.958e-04, train_time=1.675 +[gpub058:0/16] 2024-02-04 18:34:04,963 (trainer:740) INFO: 27epoch:train:1401-1500batch: iter_time=9.067e-05, forward_time=0.291, loss_ctc=50.349, loss_att=52.601, acc=0.763, loss=51.925, backward_time=0.287, grad_norm=37.103, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=1.958e-04, train_time=1.294 +[gpub058:0/16] 2024-02-04 18:36:32,926 (trainer:740) INFO: 27epoch:train:1501-1600batch: iter_time=9.723e-05, forward_time=0.286, loss_ctc=43.794, loss_att=42.500, acc=0.766, loss=42.888, backward_time=0.282, grad_norm=35.945, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.957e-04, train_time=1.481 +[gpub058:0/16] 2024-02-04 18:39:25,279 (trainer:740) INFO: 27epoch:train:1601-1700batch: iter_time=5.263e-04, forward_time=0.599, loss_ctc=48.221, loss_att=42.708, acc=0.769, loss=44.362, backward_time=0.384, grad_norm=38.115, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.106, optim0_lr0=1.957e-04, train_time=1.723 +[gpub058:0/16] 2024-02-04 18:42:10,855 (trainer:740) INFO: 27epoch:train:1701-1800batch: iter_time=9.247e-05, forward_time=0.288, loss_ctc=42.209, loss_att=38.665, acc=0.769, loss=39.728, backward_time=0.281, grad_norm=32.795, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=1.957e-04, train_time=1.656 +[gpub058:0/16] 2024-02-04 18:44:08,114 (trainer:740) INFO: 27epoch:train:1801-1900batch: iter_time=8.968e-05, forward_time=0.289, loss_ctc=44.425, loss_att=36.016, acc=0.779, loss=38.539, backward_time=0.285, grad_norm=31.109, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.091, optim0_lr0=1.957e-04, train_time=1.170 +[gpub058:0/16] 2024-02-04 18:46:36,850 (trainer:740) INFO: 27epoch:train:1901-2000batch: iter_time=9.426e-05, forward_time=0.311, loss_ctc=50.135, loss_att=49.121, acc=0.740, loss=49.425, backward_time=0.304, grad_norm=37.788, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.956e-04, train_time=1.489 +[gpub058:0/16] 2024-02-04 18:49:11,140 (trainer:740) INFO: 27epoch:train:2001-2100batch: iter_time=4.084e-04, forward_time=0.369, loss_ctc=47.112, loss_att=46.943, acc=0.756, loss=46.994, backward_time=0.333, grad_norm=40.023, clip=100.000, loss_scale=2.934e+33, optim_step_time=0.100, optim0_lr0=1.956e-04, train_time=1.543 +[gpub058:0/16] 2024-02-04 18:51:23,729 (trainer:740) INFO: 27epoch:train:2101-2200batch: iter_time=8.695e-05, forward_time=0.291, loss_ctc=45.555, loss_att=48.771, acc=0.750, loss=47.806, backward_time=0.286, grad_norm=37.583, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.956e-04, train_time=1.326 +[gpub058:0/16] 2024-02-04 18:53:24,377 (trainer:740) INFO: 27epoch:train:2201-2300batch: iter_time=9.214e-05, forward_time=0.289, loss_ctc=48.350, loss_att=44.144, acc=0.739, loss=45.405, backward_time=0.285, grad_norm=40.318, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.956e-04, train_time=1.206 +[gpub058:0/16] 2024-02-04 18:56:03,532 (trainer:740) INFO: 27epoch:train:2301-2400batch: iter_time=1.731e-04, forward_time=0.416, loss_ctc=48.962, loss_att=44.420, acc=0.752, loss=45.783, backward_time=0.324, grad_norm=37.732, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.955e-04, train_time=1.592 +[gpub058:0/16] 2024-02-04 18:58:10,822 (trainer:740) INFO: 27epoch:train:2401-2500batch: iter_time=9.135e-05, forward_time=0.318, loss_ctc=51.647, loss_att=52.544, acc=0.732, loss=52.275, backward_time=0.286, grad_norm=40.293, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.955e-04, train_time=1.273 +[gpub058:0/16] 2024-02-04 18:58:31,324 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-04 18:58:49,382 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 18:58:52,798 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 18:58:52,799 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-04 18:58:52,802 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 19:05:54,022 (trainer:740) INFO: 27epoch:train:2501-2600batch: iter_time=3.347, forward_time=0.289, loss_ctc=53.346, loss_att=55.702, acc=0.723, loss=54.995, backward_time=0.286, grad_norm=42.803, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.955e-04, train_time=4.632 +[gpub058:0/16] 2024-02-04 19:08:30,635 (trainer:740) INFO: 27epoch:train:2601-2700batch: iter_time=9.266e-05, forward_time=0.315, loss_ctc=50.976, loss_att=59.311, acc=0.737, loss=56.810, backward_time=0.300, grad_norm=40.005, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.955e-04, train_time=1.565 +[gpub058:0/16] 2024-02-04 19:10:44,426 (trainer:740) INFO: 27epoch:train:2701-2800batch: iter_time=9.445e-05, forward_time=0.385, loss_ctc=48.436, loss_att=44.084, acc=0.772, loss=45.390, backward_time=0.319, grad_norm=41.159, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.954e-04, train_time=1.339 +[gpub058:0/16] 2024-02-04 19:12:42,198 (trainer:740) INFO: 27epoch:train:2801-2900batch: iter_time=8.516e-05, forward_time=0.292, loss_ctc=46.797, loss_att=43.060, acc=0.782, loss=44.181, backward_time=0.288, grad_norm=36.376, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.954e-04, train_time=1.178 +[gpub058:0/16] 2024-02-04 19:15:00,039 (trainer:740) INFO: 27epoch:train:2901-3000batch: iter_time=9.560e-05, forward_time=0.292, loss_ctc=45.244, loss_att=42.673, acc=0.768, loss=43.444, backward_time=0.286, grad_norm=35.347, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.954e-04, train_time=1.378 +[gpub058:0/16] 2024-02-04 19:17:05,493 (trainer:740) INFO: 27epoch:train:3001-3100batch: iter_time=9.676e-05, forward_time=0.292, loss_ctc=41.817, loss_att=36.441, acc=0.765, loss=38.053, backward_time=0.287, grad_norm=33.465, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.954e-04, train_time=1.254 +[gpub058:0/16] 2024-02-04 19:19:19,073 (trainer:740) INFO: 27epoch:train:3101-3200batch: iter_time=9.224e-05, forward_time=0.371, loss_ctc=47.801, loss_att=39.572, acc=0.765, loss=42.041, backward_time=0.337, grad_norm=34.775, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.953e-04, train_time=1.335 +[gpub058:0/16] 2024-02-04 19:21:26,874 (trainer:740) INFO: 27epoch:train:3201-3300batch: iter_time=9.980e-05, forward_time=0.310, loss_ctc=49.109, loss_att=48.131, acc=0.756, loss=48.424, backward_time=0.286, grad_norm=37.791, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.953e-04, train_time=1.278 +[gpub058:0/16] 2024-02-04 19:23:37,270 (trainer:740) INFO: 27epoch:train:3301-3400batch: iter_time=9.845e-05, forward_time=0.288, loss_ctc=39.531, loss_att=42.247, acc=0.762, loss=41.432, backward_time=0.282, grad_norm=35.559, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.953e-04, train_time=1.304 +[gpub058:0/16] 2024-02-04 19:25:41,983 (trainer:740) INFO: 27epoch:train:3401-3500batch: iter_time=9.335e-05, forward_time=0.291, loss_ctc=49.778, loss_att=50.103, acc=0.729, loss=50.005, backward_time=0.291, grad_norm=43.365, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.953e-04, train_time=1.247 +[gpub058:0/16] 2024-02-04 19:28:09,153 (trainer:740) INFO: 27epoch:train:3501-3600batch: iter_time=8.601e-05, forward_time=0.401, loss_ctc=50.371, loss_att=44.242, acc=0.760, loss=46.080, backward_time=0.365, grad_norm=39.364, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.952e-04, train_time=1.471 +[gpub058:0/16] 2024-02-04 19:30:18,548 (trainer:740) INFO: 27epoch:train:3601-3700batch: iter_time=8.764e-05, forward_time=0.300, loss_ctc=48.495, loss_att=49.907, acc=0.747, loss=49.483, backward_time=0.289, grad_norm=35.990, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.952e-04, train_time=1.294 +[gpub058:0/16] 2024-02-04 19:31:43,939 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-04 19:32:02,455 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 19:32:06,125 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 19:32:06,126 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-04 19:32:06,151 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 19:38:17,946 (trainer:740) INFO: 27epoch:train:3701-3800batch: iter_time=3.257, forward_time=0.292, loss_ctc=49.495, loss_att=50.450, acc=0.725, loss=50.164, backward_time=0.283, grad_norm=41.445, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.952e-04, train_time=4.794 +[gpub058:0/16] 2024-02-04 19:41:07,603 (trainer:740) INFO: 27epoch:train:3801-3900batch: iter_time=7.794e-04, forward_time=0.415, loss_ctc=54.969, loss_att=61.843, acc=0.710, loss=59.781, backward_time=0.356, grad_norm=44.436, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.952e-04, train_time=1.696 +[gpub058:0/16] 2024-02-04 19:43:18,499 (trainer:740) INFO: 27epoch:train:3901-4000batch: iter_time=7.772e-05, forward_time=0.300, loss_ctc=49.968, loss_att=53.136, acc=0.758, loss=52.186, backward_time=0.289, grad_norm=38.625, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.951e-04, train_time=1.309 +[gpub058:0/16] 2024-02-04 19:45:28,498 (trainer:740) INFO: 27epoch:train:4001-4100batch: iter_time=7.967e-05, forward_time=0.288, loss_ctc=43.648, loss_att=41.721, acc=0.770, loss=42.299, backward_time=0.283, grad_norm=37.502, clip=100.000, loss_scale=5.867e+33, optim_step_time=0.091, optim0_lr0=1.951e-04, train_time=1.300 +[gpub058:0/16] 2024-02-04 19:47:56,579 (trainer:740) INFO: 27epoch:train:4101-4200batch: iter_time=8.188e-05, forward_time=0.319, loss_ctc=48.695, loss_att=42.322, acc=0.764, loss=44.234, backward_time=0.288, grad_norm=39.268, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.951e-04, train_time=1.481 +[gpub058:0/16] 2024-02-04 19:50:30,564 (trainer:740) INFO: 27epoch:train:4201-4300batch: iter_time=8.729e-05, forward_time=0.394, loss_ctc=41.588, loss_att=37.690, acc=0.777, loss=38.859, backward_time=0.365, grad_norm=32.277, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.951e-04, train_time=1.540 +[gpub058:0/16] 2024-02-04 19:53:02,997 (trainer:740) INFO: 27epoch:train:4301-4400batch: iter_time=8.443e-05, forward_time=0.299, loss_ctc=44.604, loss_att=36.181, acc=0.773, loss=38.708, backward_time=0.284, grad_norm=32.926, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.950e-04, train_time=1.524 +[gpub058:0/16] 2024-02-04 19:55:10,590 (trainer:740) INFO: 27epoch:train:4401-4500batch: iter_time=8.660e-05, forward_time=0.292, loss_ctc=49.839, loss_att=47.597, acc=0.737, loss=48.270, backward_time=0.285, grad_norm=39.092, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.950e-04, train_time=1.276 +[gpub058:0/16] 2024-02-04 19:57:35,074 (trainer:740) INFO: 27epoch:train:4501-4600batch: iter_time=8.498e-05, forward_time=0.291, loss_ctc=46.598, loss_att=46.465, acc=0.749, loss=46.505, backward_time=0.285, grad_norm=39.171, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.950e-04, train_time=1.444 +[gpub058:0/16] 2024-02-04 20:00:07,152 (trainer:740) INFO: 27epoch:train:4601-4700batch: iter_time=8.447e-05, forward_time=0.381, loss_ctc=45.107, loss_att=49.453, acc=0.738, loss=48.149, backward_time=0.314, grad_norm=37.534, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.109, optim0_lr0=1.950e-04, train_time=1.521 +[gpub058:0/16] 2024-02-04 20:02:40,088 (trainer:740) INFO: 27epoch:train:4701-4800batch: iter_time=8.701e-05, forward_time=0.325, loss_ctc=47.091, loss_att=41.041, acc=0.743, loss=42.856, backward_time=0.292, grad_norm=40.113, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.950e-04, train_time=1.529 +[gpub058:0/16] 2024-02-04 20:04:57,056 (trainer:740) INFO: 27epoch:train:4801-4900batch: iter_time=8.643e-05, forward_time=0.297, loss_ctc=48.147, loss_att=43.772, acc=0.748, loss=45.085, backward_time=0.284, grad_norm=37.569, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.949e-04, train_time=1.369 +[gpub058:0/16] 2024-02-04 20:07:36,729 (trainer:740) INFO: 27epoch:train:4901-5000batch: iter_time=8.753e-05, forward_time=0.291, loss_ctc=51.270, loss_att=53.018, acc=0.724, loss=52.494, backward_time=0.286, grad_norm=41.416, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=1.949e-04, train_time=1.596 +[gpub058:0/16] 2024-02-04 20:07:56,772 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-04 20:08:15,182 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 20:08:18,652 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 20:08:18,653 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-04 20:08:18,770 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 20:24:17,509 (trainer:740) INFO: 27epoch:train:5001-5100batch: iter_time=3.606, forward_time=0.453, loss_ctc=52.840, loss_att=56.628, acc=0.723, loss=55.492, backward_time=0.307, grad_norm=42.503, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.949e-04, train_time=10.009 +[gpub058:0/16] 2024-02-04 20:29:29,875 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 20:33:15,845 (trainer:740) INFO: 27epoch:train:5101-5200batch: iter_time=9.567e-05, forward_time=0.289, loss_ctc=50.422, loss_att=59.089, acc=0.742, loss=56.489, backward_time=0.285, grad_norm=38.492, clip=100.000, loss_scale=8.339e+33, optim_step_time=0.092, optim0_lr0=1.949e-04, train_time=5.383 +[gpub058:0/16] 2024-02-04 20:45:43,751 (trainer:740) INFO: 27epoch:train:5201-5300batch: iter_time=9.848e-05, forward_time=0.289, loss_ctc=48.239, loss_att=44.160, acc=0.772, loss=45.384, backward_time=0.286, grad_norm=37.780, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.948e-04, train_time=7.479 +[gpub058:0/16] 2024-02-04 21:00:33,989 (trainer:740) INFO: 27epoch:train:5301-5400batch: iter_time=9.130e-05, forward_time=0.370, loss_ctc=46.139, loss_att=42.616, acc=0.785, loss=43.673, backward_time=0.421, grad_norm=35.079, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.948e-04, train_time=8.901 +[gpub058:0/16] 2024-02-04 21:14:18,330 (trainer:740) INFO: 27epoch:train:5401-5500batch: iter_time=9.172e-05, forward_time=0.289, loss_ctc=44.952, loss_att=43.130, acc=0.768, loss=43.677, backward_time=0.280, grad_norm=36.253, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.948e-04, train_time=8.245 +[gpub058:0/16] 2024-02-04 21:20:01,103 (trainer:740) INFO: 27epoch:train:5501-5600batch: iter_time=9.072e-05, forward_time=0.298, loss_ctc=41.714, loss_att=36.119, acc=0.767, loss=37.798, backward_time=0.288, grad_norm=32.405, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.948e-04, train_time=3.427 +[gpub058:0/16] 2024-02-04 21:30:52,773 (trainer:740) INFO: 27epoch:train:5601-5700batch: iter_time=8.951e-05, forward_time=0.287, loss_ctc=47.793, loss_att=39.825, acc=0.764, loss=42.216, backward_time=0.280, grad_norm=35.367, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.947e-04, train_time=6.516 +[gpub058:0/16] 2024-02-04 21:44:00,225 (trainer:740) INFO: 27epoch:train:5701-5800batch: iter_time=9.916e-05, forward_time=0.407, loss_ctc=49.260, loss_att=48.744, acc=0.754, loss=48.899, backward_time=0.315, grad_norm=37.406, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.947e-04, train_time=7.874 +[gpub058:0/16] 2024-02-04 21:56:05,443 (trainer:740) INFO: 27epoch:train:5801-5900batch: iter_time=9.465e-05, forward_time=0.285, loss_ctc=39.565, loss_att=42.007, acc=0.765, loss=41.275, backward_time=0.278, grad_norm=35.548, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.947e-04, train_time=7.253 +[gpub058:0/16] 2024-02-04 22:04:40,096 (trainer:740) INFO: 27epoch:train:5901-6000batch: iter_time=1.109e-04, forward_time=0.297, loss_ctc=49.674, loss_att=49.113, acc=0.732, loss=49.281, backward_time=0.284, grad_norm=43.267, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.947e-04, train_time=5.146 +[gpub058:0/16] 2024-02-04 22:20:41,611 (trainer:740) INFO: 27epoch:train:6001-6100batch: iter_time=1.088e-04, forward_time=0.288, loss_ctc=49.440, loss_att=43.808, acc=0.761, loss=45.498, backward_time=0.278, grad_norm=39.326, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.946e-04, train_time=9.615 +[gpub058:0/16] 2024-02-04 22:27:58,534 (trainer:740) INFO: 27epoch:train:6101-6200batch: iter_time=1.177e-04, forward_time=0.429, loss_ctc=48.177, loss_att=49.691, acc=0.751, loss=49.237, backward_time=0.310, grad_norm=36.868, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.946e-04, train_time=4.369 +[gpub058:0/16] 2024-02-04 22:37:04,354 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-04 22:37:23,182 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 22:37:26,731 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 22:37:26,731 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-04 22:37:26,863 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 22:45:02,961 (trainer:740) INFO: 27epoch:train:6201-6300batch: iter_time=4.118, forward_time=0.472, loss_ctc=49.468, loss_att=49.098, acc=0.732, loss=49.209, backward_time=0.308, grad_norm=44.876, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.946e-04, train_time=10.244 +[gpub058:0/16] 2024-02-04 22:48:01,297 (trainer:740) INFO: 27epoch:train:6301-6400batch: iter_time=8.757e-05, forward_time=0.291, loss_ctc=55.243, loss_att=60.623, acc=0.718, loss=59.009, backward_time=0.285, grad_norm=43.489, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.946e-04, train_time=1.784 +[gpub058:0/16] 2024-02-04 22:50:57,413 (trainer:740) INFO: 27epoch:train:6401-6500batch: iter_time=8.880e-05, forward_time=0.309, loss_ctc=49.391, loss_att=51.735, acc=0.766, loss=51.032, backward_time=0.291, grad_norm=38.171, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.945e-04, train_time=1.761 +[gpub058:0/16] 2024-02-04 22:53:11,905 (trainer:740) INFO: 27epoch:train:6501-6600batch: iter_time=9.475e-05, forward_time=0.433, loss_ctc=43.298, loss_att=41.709, acc=0.773, loss=42.186, backward_time=0.329, grad_norm=35.465, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.945e-04, train_time=1.345 +[gpub058:0/16] 2024-02-04 22:55:33,018 (trainer:740) INFO: 27epoch:train:6601-6700batch: iter_time=1.006e-04, forward_time=0.291, loss_ctc=47.683, loss_att=41.519, acc=0.775, loss=43.369, backward_time=0.288, grad_norm=38.923, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.945e-04, train_time=1.411 +[gpub058:0/16] 2024-02-04 22:58:16,613 (trainer:740) INFO: 27epoch:train:6701-6800batch: iter_time=9.617e-05, forward_time=0.287, loss_ctc=41.155, loss_att=38.434, acc=0.773, loss=39.251, backward_time=0.281, grad_norm=31.720, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.945e-04, train_time=1.636 +[gpub058:0/16] 2024-02-04 23:01:23,056 (trainer:740) INFO: 27epoch:train:6801-6900batch: iter_time=3.646e-04, forward_time=0.430, loss_ctc=44.262, loss_att=35.702, acc=0.781, loss=38.270, backward_time=0.354, grad_norm=32.771, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.944e-04, train_time=1.864 +[gpub058:0/16] 2024-02-04 23:03:21,115 (trainer:740) INFO: 27epoch:train:6901-7000batch: iter_time=9.047e-05, forward_time=0.294, loss_ctc=49.561, loss_att=47.533, acc=0.747, loss=48.142, backward_time=0.286, grad_norm=38.564, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.944e-04, train_time=1.180 +[gpub058:0/16] 2024-02-04 23:05:30,859 (trainer:740) INFO: 27epoch:train:7001-7100batch: iter_time=9.077e-05, forward_time=0.292, loss_ctc=46.351, loss_att=45.805, acc=0.760, loss=45.969, backward_time=0.289, grad_norm=38.791, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.944e-04, train_time=1.297 +[gpub058:0/16] 2024-02-04 23:08:19,747 (trainer:740) INFO: 27epoch:train:7101-7200batch: iter_time=9.027e-05, forward_time=0.341, loss_ctc=45.270, loss_att=49.709, acc=0.747, loss=48.378, backward_time=0.284, grad_norm=37.582, clip=100.000, loss_scale=7.217e+33, optim_step_time=0.092, optim0_lr0=1.944e-04, train_time=1.689 +[gpub058:0/16] 2024-02-04 23:10:54,330 (trainer:740) INFO: 27epoch:train:7201-7300batch: iter_time=9.100e-05, forward_time=0.345, loss_ctc=46.818, loss_att=42.804, acc=0.743, loss=44.008, backward_time=0.387, grad_norm=39.769, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.943e-04, train_time=1.546 +[gpub058:0/16] 2024-02-04 23:13:15,132 (trainer:740) INFO: 27epoch:train:7301-7400batch: iter_time=9.934e-05, forward_time=0.290, loss_ctc=47.672, loss_att=43.402, acc=0.756, loss=44.683, backward_time=0.285, grad_norm=37.917, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.943e-04, train_time=1.408 +[gpub058:0/16] 2024-02-04 23:15:30,658 (trainer:740) INFO: 27epoch:train:7401-7500batch: iter_time=8.038e-05, forward_time=0.290, loss_ctc=51.333, loss_att=52.747, acc=0.732, loss=52.323, backward_time=0.284, grad_norm=42.071, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.943e-04, train_time=1.355 +[gpub058:0/16] 2024-02-04 23:15:50,698 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-04 23:16:09,666 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 23:16:13,223 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 23:16:13,223 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-04 23:16:13,272 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 23:24:11,535 (trainer:740) INFO: 27epoch:train:7501-7600batch: iter_time=3.754, forward_time=0.461, loss_ctc=52.606, loss_att=56.716, acc=0.717, loss=55.483, backward_time=0.319, grad_norm=43.217, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.943e-04, train_time=5.209 +[gpub058:0/16] 2024-02-04 23:26:27,385 (trainer:740) INFO: 27epoch:train:7601-7700batch: iter_time=8.868e-05, forward_time=0.292, loss_ctc=50.597, loss_att=59.666, acc=0.731, loss=56.945, backward_time=0.287, grad_norm=41.802, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.942e-04, train_time=1.358 +[gpub058:0/16] 2024-02-04 23:27:23,880 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-04 23:29:02,701 (trainer:740) INFO: 27epoch:train:7701-7800batch: iter_time=8.741e-05, forward_time=0.289, loss_ctc=47.787, loss_att=43.932, acc=0.770, loss=45.088, backward_time=0.283, grad_norm=39.838, clip=100.000, loss_scale=6.923e+33, optim_step_time=0.092, optim0_lr0=1.942e-04, train_time=1.553 +[gpub058:0/16] 2024-02-04 23:31:20,686 (trainer:740) INFO: 27epoch:train:7801-7900batch: iter_time=6.298e-04, forward_time=0.444, loss_ctc=46.266, loss_att=43.203, acc=0.780, loss=44.122, backward_time=0.349, grad_norm=35.582, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.942e-04, train_time=1.380 +[gpub058:0/16] 2024-02-04 23:33:59,203 (trainer:740) INFO: 27epoch:train:7901-8000batch: iter_time=8.727e-05, forward_time=0.316, loss_ctc=44.586, loss_att=42.530, acc=0.766, loss=43.147, backward_time=0.283, grad_norm=35.780, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.942e-04, train_time=1.585 +[gpub058:0/16] 2024-02-04 23:36:01,264 (trainer:740) INFO: 27epoch:train:8001-8100batch: iter_time=9.342e-05, forward_time=0.287, loss_ctc=41.219, loss_att=35.291, acc=0.768, loss=37.069, backward_time=0.281, grad_norm=32.225, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.941e-04, train_time=1.220 +[gpub058:0/16] 2024-02-04 23:38:39,300 (trainer:740) INFO: 27epoch:train:8101-8200batch: iter_time=9.679e-05, forward_time=0.413, loss_ctc=47.047, loss_att=39.747, acc=0.757, loss=41.937, backward_time=0.336, grad_norm=35.298, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.941e-04, train_time=1.580 +[gpub058:0/16] 2024-02-04 23:41:12,926 (trainer:740) INFO: 27epoch:train:8201-8300batch: iter_time=9.884e-05, forward_time=0.289, loss_ctc=48.676, loss_att=48.025, acc=0.750, loss=48.220, backward_time=0.284, grad_norm=37.629, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.941e-04, train_time=1.536 +[gpub058:0/16] 2024-02-04 23:43:10,309 (trainer:740) INFO: 27epoch:train:8301-8400batch: iter_time=8.890e-05, forward_time=0.287, loss_ctc=39.026, loss_att=42.075, acc=0.755, loss=41.160, backward_time=0.282, grad_norm=34.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.941e-04, train_time=1.174 +[gpub058:0/16] 2024-02-04 23:45:31,055 (trainer:740) INFO: 27epoch:train:8401-8500batch: iter_time=9.294e-05, forward_time=0.367, loss_ctc=48.934, loss_att=48.889, acc=0.727, loss=48.902, backward_time=0.325, grad_norm=41.105, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.940e-04, train_time=1.407 +[gpub058:0/16] 2024-02-04 23:48:35,548 (trainer:740) INFO: 27epoch:train:8501-8600batch: iter_time=9.397e-05, forward_time=0.332, loss_ctc=49.032, loss_att=42.099, acc=0.759, loss=44.179, backward_time=0.344, grad_norm=39.451, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.940e-04, train_time=1.845 +[gpub058:0/16] 2024-02-04 23:50:32,259 (trainer:740) INFO: 27epoch:train:8601-8700batch: iter_time=8.963e-05, forward_time=0.291, loss_ctc=48.097, loss_att=49.590, acc=0.744, loss=49.142, backward_time=0.286, grad_norm=37.073, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.940e-04, train_time=1.167 +[gpub058:0/16] 2024-02-04 23:51:46,550 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-04 23:52:05,397 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-04 23:52:08,896 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-04 23:52:08,896 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-04 23:52:08,927 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-04 23:58:50,856 (trainer:740) INFO: 27epoch:train:8701-8800batch: iter_time=3.557, forward_time=0.427, loss_ctc=48.828, loss_att=48.665, acc=0.725, loss=48.714, backward_time=0.305, grad_norm=41.706, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.940e-04, train_time=4.986 +[gpub058:0/16] 2024-02-05 00:01:36,620 (trainer:740) INFO: 27epoch:train:8801-8900batch: iter_time=0.002, forward_time=0.395, loss_ctc=54.646, loss_att=59.902, acc=0.715, loss=58.325, backward_time=0.360, grad_norm=44.047, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.939e-04, train_time=1.657 +[gpub058:0/16] 2024-02-05 00:03:48,726 (trainer:740) INFO: 27epoch:train:8901-9000batch: iter_time=7.408e-05, forward_time=0.432, loss_ctc=48.968, loss_att=52.023, acc=0.761, loss=51.107, backward_time=0.334, grad_norm=39.053, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.939e-04, train_time=1.321 +[gpub058:0/16] 2024-02-05 00:06:03,731 (trainer:740) INFO: 27epoch:train:9001-9100batch: iter_time=9.361e-05, forward_time=0.301, loss_ctc=43.436, loss_att=41.445, acc=0.773, loss=42.042, backward_time=0.288, grad_norm=34.271, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.939e-04, train_time=1.350 +[gpub058:0/16] 2024-02-05 00:08:40,216 (trainer:740) INFO: 27epoch:train:9101-9200batch: iter_time=8.740e-04, forward_time=0.400, loss_ctc=46.463, loss_att=41.119, acc=0.769, loss=42.722, backward_time=0.326, grad_norm=37.979, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.939e-04, train_time=1.565 +[gpub058:0/16] 2024-02-05 00:10:53,592 (trainer:740) INFO: 27epoch:train:9201-9300batch: iter_time=0.001, forward_time=0.386, loss_ctc=40.863, loss_att=37.422, acc=0.779, loss=38.455, backward_time=0.380, grad_norm=31.772, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.112, optim0_lr0=1.938e-04, train_time=1.334 +[gpub058:0/16] 2024-02-05 00:13:14,082 (trainer:740) INFO: 27epoch:train:9301-9400batch: iter_time=9.001e-05, forward_time=0.313, loss_ctc=44.349, loss_att=35.843, acc=0.775, loss=38.395, backward_time=0.295, grad_norm=31.976, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.938e-04, train_time=1.405 +[gpub058:0/16] 2024-02-05 00:16:02,101 (trainer:740) INFO: 27epoch:train:9401-9500batch: iter_time=9.264e-05, forward_time=0.402, loss_ctc=48.737, loss_att=46.751, acc=0.741, loss=47.347, backward_time=0.302, grad_norm=38.001, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.938e-04, train_time=1.680 +[gpub058:0/16] 2024-02-05 00:18:20,563 (trainer:740) INFO: 27epoch:train:9501-9600batch: iter_time=1.547e-04, forward_time=0.423, loss_ctc=45.993, loss_att=45.411, acc=0.751, loss=45.586, backward_time=0.330, grad_norm=39.209, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=1.938e-04, train_time=1.380 +[gpub058:0/16] 2024-02-05 00:20:41,977 (trainer:740) INFO: 27epoch:train:9601-9700batch: iter_time=2.452e-04, forward_time=0.433, loss_ctc=44.416, loss_att=48.835, acc=0.740, loss=47.509, backward_time=0.309, grad_norm=37.864, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.938e-04, train_time=1.418 +[gpub058:0/16] 2024-02-05 00:23:15,926 (trainer:740) INFO: 27epoch:train:9701-9800batch: iter_time=8.279e-05, forward_time=0.316, loss_ctc=46.660, loss_att=40.736, acc=0.745, loss=42.513, backward_time=0.310, grad_norm=41.136, clip=100.000, loss_scale=8.619e+33, optim_step_time=0.093, optim0_lr0=1.937e-04, train_time=1.537 +[gpub058:0/16] 2024-02-05 00:25:57,910 (trainer:740) INFO: 27epoch:train:9801-9900batch: iter_time=7.196e-04, forward_time=0.436, loss_ctc=47.420, loss_att=43.323, acc=0.750, loss=44.552, backward_time=0.334, grad_norm=38.265, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.937e-04, train_time=1.622 +[gpub058:0/16] 2024-02-05 00:28:13,710 (trainer:740) INFO: 27epoch:train:9901-10000batch: iter_time=0.001, forward_time=0.438, loss_ctc=49.975, loss_att=52.687, acc=0.726, loss=51.873, backward_time=0.356, grad_norm=41.545, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=1.937e-04, train_time=1.358 +[gpub058:0/16] 2024-02-05 00:28:34,013 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub058:0/16] 2024-02-05 00:28:52,654 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 00:28:56,175 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 00:28:56,175 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-05 00:28:56,209 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 00:37:29,309 (trainer:740) INFO: 27epoch:train:10001-10100batch: iter_time=4.108, forward_time=0.290, loss_ctc=52.018, loss_att=56.784, acc=0.724, loss=55.355, backward_time=0.286, grad_norm=42.709, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.937e-04, train_time=5.556 +[gpub058:0/16] 2024-02-05 00:39:44,464 (trainer:740) INFO: 27epoch:train:10101-10200batch: iter_time=8.630e-05, forward_time=0.293, loss_ctc=50.317, loss_att=59.473, acc=0.742, loss=56.726, backward_time=0.288, grad_norm=38.406, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=1.936e-04, train_time=1.351 +[gpub058:0/16] 2024-02-05 00:42:25,287 (trainer:740) INFO: 27epoch:train:10201-10300batch: iter_time=8.770e-05, forward_time=0.418, loss_ctc=47.414, loss_att=43.966, acc=0.772, loss=45.000, backward_time=0.345, grad_norm=39.107, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=1.936e-04, train_time=1.608 +[gpub058:0/16] 2024-02-05 00:44:33,394 (trainer:740) INFO: 27epoch:train:10301-10400batch: iter_time=8.378e-05, forward_time=0.290, loss_ctc=46.111, loss_att=42.057, acc=0.787, loss=43.273, backward_time=0.286, grad_norm=34.729, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=1.936e-04, train_time=1.281 +[gpub058:0/16] 2024-02-05 00:45:20,906 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 00:46:44,510 (trainer:740) INFO: 27epoch:train:10401-10500batch: iter_time=8.677e-05, forward_time=0.291, loss_ctc=44.292, loss_att=42.939, acc=0.769, loss=43.345, backward_time=0.286, grad_norm=39.870, clip=100.000, loss_scale=7.448e+33, optim_step_time=0.091, optim0_lr0=1.936e-04, train_time=1.311 +[gpub058:0/16] 2024-02-05 00:49:26,846 (trainer:740) INFO: 27epoch:train:10501-10600batch: iter_time=8.411e-05, forward_time=0.397, loss_ctc=41.178, loss_att=35.908, acc=0.768, loss=37.489, backward_time=0.360, grad_norm=32.385, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.935e-04, train_time=1.623 +[gpub058:0/16] 2024-02-05 00:51:46,027 (trainer:740) INFO: 27epoch:train:10601-10700batch: iter_time=8.952e-05, forward_time=0.305, loss_ctc=47.141, loss_att=39.839, acc=0.766, loss=42.030, backward_time=0.284, grad_norm=35.490, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.935e-04, train_time=1.391 +[gpub058:0/16] 2024-02-05 00:53:47,332 (trainer:740) INFO: 27epoch:train:10701-10800batch: iter_time=8.853e-05, forward_time=0.292, loss_ctc=48.819, loss_att=48.561, acc=0.755, loss=48.638, backward_time=0.288, grad_norm=38.158, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.935e-04, train_time=1.213 +[gpub058:0/16] 2024-02-05 00:56:29,992 (trainer:740) INFO: 27epoch:train:10801-10900batch: iter_time=7.331e-04, forward_time=0.364, loss_ctc=38.913, loss_att=41.935, acc=0.767, loss=41.028, backward_time=0.347, grad_norm=35.164, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.935e-04, train_time=1.626 +[gpub058:0/16] 2024-02-05 00:58:56,988 (trainer:740) INFO: 27epoch:train:10901-11000batch: iter_time=8.458e-05, forward_time=0.336, loss_ctc=48.673, loss_att=49.359, acc=0.734, loss=49.153, backward_time=0.306, grad_norm=41.505, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.934e-04, train_time=1.469 +[gpub058:0/16] 2024-02-05 01:00:53,915 (trainer:740) INFO: 27epoch:train:11001-11100batch: iter_time=8.804e-05, forward_time=0.290, loss_ctc=48.846, loss_att=43.889, acc=0.762, loss=45.376, backward_time=0.285, grad_norm=38.768, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.934e-04, train_time=1.169 +[gpub058:0/16] 2024-02-05 01:03:09,677 (trainer:740) INFO: 27epoch:train:11101-11200batch: iter_time=9.224e-05, forward_time=0.291, loss_ctc=47.734, loss_att=49.106, acc=0.750, loss=48.695, backward_time=0.285, grad_norm=35.353, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.934e-04, train_time=1.358 +[gpub058:0/16] 2024-02-05 01:05:03,074 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-05 01:05:21,784 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 01:05:25,587 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 01:05:25,587 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-05 01:05:25,590 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 01:12:14,706 (trainer:740) INFO: 27epoch:train:11201-11300batch: iter_time=3.762, forward_time=0.462, loss_ctc=48.872, loss_att=49.664, acc=0.728, loss=49.426, backward_time=0.346, grad_norm=41.745, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.934e-04, train_time=5.449 +[gpub058:0/16] 2024-02-05 01:14:29,160 (trainer:740) INFO: 27epoch:train:11301-11400batch: iter_time=7.760e-05, forward_time=0.290, loss_ctc=54.402, loss_att=60.486, acc=0.712, loss=58.661, backward_time=0.286, grad_norm=43.289, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.933e-04, train_time=1.345 +[gpub058:0/16] 2024-02-05 01:17:32,782 (trainer:740) INFO: 27epoch:train:11401-11500batch: iter_time=8.385e-05, forward_time=0.477, loss_ctc=48.669, loss_att=52.007, acc=0.763, loss=51.006, backward_time=0.329, grad_norm=37.424, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.933e-04, train_time=1.836 +[gpub058:0/16] 2024-02-05 01:20:21,038 (trainer:740) INFO: 27epoch:train:11501-11600batch: iter_time=8.640e-05, forward_time=0.287, loss_ctc=42.848, loss_att=41.313, acc=0.773, loss=41.773, backward_time=0.279, grad_norm=35.417, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.933e-04, train_time=1.682 +[gpub058:0/16] 2024-02-05 01:22:38,859 (trainer:740) INFO: 27epoch:train:11601-11700batch: iter_time=8.432e-05, forward_time=0.290, loss_ctc=47.473, loss_att=41.604, acc=0.769, loss=43.365, backward_time=0.284, grad_norm=37.456, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.933e-04, train_time=1.379 +[gpub058:0/16] 2024-02-05 01:25:30,504 (trainer:740) INFO: 27epoch:train:11701-11800batch: iter_time=3.973e-04, forward_time=0.416, loss_ctc=40.973, loss_att=37.969, acc=0.776, loss=38.870, backward_time=0.352, grad_norm=31.846, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.932e-04, train_time=1.716 +[gpub058:0/16] 2024-02-05 01:28:14,657 (trainer:740) INFO: 27epoch:train:11801-11900batch: iter_time=9.010e-05, forward_time=0.287, loss_ctc=43.989, loss_att=35.533, acc=0.778, loss=38.070, backward_time=0.281, grad_norm=30.872, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.932e-04, train_time=1.641 +[gpub058:0/16] 2024-02-05 01:30:27,167 (trainer:740) INFO: 27epoch:train:11901-12000batch: iter_time=9.269e-05, forward_time=0.288, loss_ctc=49.238, loss_att=47.884, acc=0.737, loss=48.290, backward_time=0.284, grad_norm=38.729, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.932e-04, train_time=1.325 +[gpub058:0/16] 2024-02-05 01:33:07,961 (trainer:740) INFO: 27epoch:train:12001-12100batch: iter_time=8.508e-05, forward_time=0.409, loss_ctc=45.750, loss_att=44.646, acc=0.756, loss=44.977, backward_time=0.353, grad_norm=38.152, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.932e-04, train_time=1.608 +[gpub058:0/16] 2024-02-05 01:35:58,089 (trainer:740) INFO: 27epoch:train:12101-12200batch: iter_time=8.376e-05, forward_time=0.318, loss_ctc=44.445, loss_att=48.118, acc=0.744, loss=47.016, backward_time=0.287, grad_norm=36.351, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.931e-04, train_time=1.701 +[gpub058:0/16] 2024-02-05 01:38:10,422 (trainer:740) INFO: 27epoch:train:12201-12300batch: iter_time=8.794e-05, forward_time=0.288, loss_ctc=45.717, loss_att=40.647, acc=0.747, loss=42.168, backward_time=0.281, grad_norm=40.244, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.091, optim0_lr0=1.931e-04, train_time=1.323 +[gpub058:0/16] 2024-02-05 01:40:48,744 (trainer:740) INFO: 27epoch:train:12301-12400batch: iter_time=8.311e-05, forward_time=0.338, loss_ctc=47.730, loss_att=43.471, acc=0.748, loss=44.749, backward_time=0.332, grad_norm=39.359, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.931e-04, train_time=1.583 +[gpub058:0/16] 2024-02-05 01:43:39,997 (trainer:740) INFO: 27epoch:train:12401-12500batch: iter_time=0.002, forward_time=0.357, loss_ctc=49.958, loss_att=51.923, acc=0.730, loss=51.334, backward_time=0.308, grad_norm=39.487, clip=100.000, loss_scale=8.100e+33, optim_step_time=0.096, optim0_lr0=1.931e-04, train_time=1.712 +[gpub058:0/16] 2024-02-05 01:44:00,171 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-05 01:44:18,986 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 01:44:22,781 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 01:44:22,781 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-05 01:44:22,784 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 01:55:12,445 (trainer:740) INFO: 27epoch:train:12501-12600batch: iter_time=3.510, forward_time=0.307, loss_ctc=51.768, loss_att=53.649, acc=0.726, loss=53.085, backward_time=0.292, grad_norm=41.941, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.931e-04, train_time=6.925 +[gpub058:0/16] 2024-02-05 01:57:31,655 (trainer:740) INFO: 27epoch:train:12601-12700batch: iter_time=8.295e-05, forward_time=0.397, loss_ctc=50.334, loss_att=59.071, acc=0.733, loss=56.450, backward_time=0.341, grad_norm=39.903, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.930e-04, train_time=1.392 +[gpub058:0/16] 2024-02-05 02:00:15,262 (trainer:740) INFO: 27epoch:train:12701-12800batch: iter_time=8.369e-05, forward_time=0.323, loss_ctc=47.159, loss_att=43.460, acc=0.771, loss=44.570, backward_time=0.291, grad_norm=38.349, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.930e-04, train_time=1.635 +[gpub058:0/16] 2024-02-05 02:02:21,611 (trainer:740) INFO: 27epoch:train:12801-12900batch: iter_time=8.538e-05, forward_time=0.302, loss_ctc=45.907, loss_att=42.467, acc=0.782, loss=43.499, backward_time=0.286, grad_norm=34.341, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.930e-04, train_time=1.264 +[gpub058:0/16] 2024-02-05 02:04:25,653 (trainer:740) INFO: 27epoch:train:12901-13000batch: iter_time=7.895e-05, forward_time=0.392, loss_ctc=44.215, loss_att=41.657, acc=0.769, loss=42.424, backward_time=0.328, grad_norm=37.803, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.930e-04, train_time=1.240 +[gpub058:0/16] 2024-02-05 02:06:46,888 (trainer:740) INFO: 27epoch:train:13001-13100batch: iter_time=7.969e-05, forward_time=0.296, loss_ctc=41.140, loss_att=34.876, acc=0.772, loss=36.756, backward_time=0.302, grad_norm=32.083, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.929e-04, train_time=1.411 +[gpub058:0/16] 2024-02-05 02:09:17,045 (trainer:740) INFO: 27epoch:train:13101-13200batch: iter_time=8.035e-05, forward_time=0.289, loss_ctc=46.601, loss_att=38.729, acc=0.763, loss=41.091, backward_time=0.281, grad_norm=36.308, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=1.929e-04, train_time=1.499 +[gpub058:0/16] 2024-02-05 02:11:32,412 (trainer:740) INFO: 27epoch:train:13201-13300batch: iter_time=8.362e-05, forward_time=0.469, loss_ctc=48.557, loss_att=47.289, acc=0.753, loss=47.669, backward_time=0.344, grad_norm=37.195, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.929e-04, train_time=1.356 +[gpub058:0/16] 2024-02-05 02:13:25,219 (trainer:740) INFO: 27epoch:train:13301-13400batch: iter_time=9.006e-05, forward_time=0.299, loss_ctc=38.823, loss_att=41.773, acc=0.754, loss=40.888, backward_time=0.300, grad_norm=35.790, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.929e-04, train_time=1.128 +[gpub058:0/16] 2024-02-05 02:16:17,894 (trainer:740) INFO: 27epoch:train:13401-13500batch: iter_time=7.983e-05, forward_time=0.289, loss_ctc=48.330, loss_att=48.982, acc=0.728, loss=48.786, backward_time=0.281, grad_norm=41.900, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.091, optim0_lr0=1.928e-04, train_time=1.726 +[gpub058:0/16] 2024-02-05 02:18:33,270 (trainer:740) INFO: 27epoch:train:13501-13600batch: iter_time=8.065e-05, forward_time=0.460, loss_ctc=48.611, loss_att=41.946, acc=0.760, loss=43.946, backward_time=0.350, grad_norm=36.325, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.928e-04, train_time=1.354 +[gpub058:0/16] 2024-02-05 02:20:24,458 (trainer:740) INFO: 27epoch:train:13601-13700batch: iter_time=8.822e-05, forward_time=0.302, loss_ctc=47.689, loss_att=49.728, acc=0.745, loss=49.117, backward_time=0.302, grad_norm=36.197, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.928e-04, train_time=1.112 +[gpub058:0/16] 2024-02-05 02:22:00,897 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-05 02:22:19,764 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 02:22:23,591 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 02:22:23,591 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-05 02:22:23,594 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 02:29:09,451 (trainer:740) INFO: 27epoch:train:13701-13800batch: iter_time=3.680, forward_time=0.389, loss_ctc=48.832, loss_att=51.047, acc=0.725, loss=50.382, backward_time=0.305, grad_norm=42.732, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.928e-04, train_time=5.249 +[gpub058:0/16] 2024-02-05 02:31:02,978 (trainer:740) INFO: 27epoch:train:13801-13900batch: iter_time=8.030e-05, forward_time=0.302, loss_ctc=54.604, loss_att=62.464, acc=0.716, loss=60.106, backward_time=0.293, grad_norm=47.160, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.927e-04, train_time=1.135 +[gpub058:0/16] 2024-02-05 02:34:21,989 (trainer:740) INFO: 27epoch:train:13901-14000batch: iter_time=7.990e-05, forward_time=0.422, loss_ctc=48.734, loss_att=52.008, acc=0.767, loss=51.025, backward_time=0.307, grad_norm=37.923, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.119, optim0_lr0=1.927e-04, train_time=1.990 +[gpub058:0/16] 2024-02-05 02:36:22,907 (trainer:740) INFO: 27epoch:train:14001-14100batch: iter_time=8.502e-05, forward_time=0.318, loss_ctc=43.020, loss_att=41.881, acc=0.773, loss=42.223, backward_time=0.295, grad_norm=34.679, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.927e-04, train_time=1.209 +[gpub058:0/16] 2024-02-05 02:38:58,638 (trainer:740) INFO: 27epoch:train:14101-14200batch: iter_time=8.526e-05, forward_time=0.291, loss_ctc=47.171, loss_att=41.659, acc=0.775, loss=43.312, backward_time=0.285, grad_norm=40.059, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.927e-04, train_time=1.558 +[gpub058:0/16] 2024-02-05 02:41:38,607 (trainer:740) INFO: 27epoch:train:14201-14300batch: iter_time=8.758e-05, forward_time=0.350, loss_ctc=40.630, loss_att=38.276, acc=0.775, loss=38.982, backward_time=0.357, grad_norm=32.236, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.926e-04, train_time=1.599 +[gpub058:0/16] 2024-02-05 02:43:56,568 (trainer:740) INFO: 27epoch:train:14301-14400batch: iter_time=8.391e-05, forward_time=0.289, loss_ctc=43.977, loss_att=35.825, acc=0.782, loss=38.270, backward_time=0.298, grad_norm=31.336, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.926e-04, train_time=1.379 +[gpub058:0/16] 2024-02-05 02:45:42,971 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 02:46:22,556 (trainer:740) INFO: 27epoch:train:14401-14500batch: iter_time=9.055e-05, forward_time=0.294, loss_ctc=48.676, loss_att=47.958, acc=0.748, loss=48.174, backward_time=0.286, grad_norm=38.818, clip=100.000, loss_scale=1.385e+34, optim_step_time=0.091, optim0_lr0=1.926e-04, train_time=1.459 +[gpub058:0/16] 2024-02-05 02:46:41,578 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 02:48:59,488 (trainer:740) INFO: 27epoch:train:14501-14600batch: iter_time=8.555e-05, forward_time=0.369, loss_ctc=45.756, loss_att=45.696, acc=0.762, loss=45.714, backward_time=0.369, grad_norm=39.801, clip=100.000, loss_scale=5.927e+33, optim_step_time=0.097, optim0_lr0=1.926e-04, train_time=1.569 +[gpub058:0/16] 2024-02-05 02:50:57,545 (trainer:740) INFO: 27epoch:train:14601-14700batch: iter_time=8.239e-05, forward_time=0.296, loss_ctc=44.109, loss_att=49.235, acc=0.750, loss=47.697, backward_time=0.287, grad_norm=35.820, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.926e-04, train_time=1.180 +[gpub058:0/16] 2024-02-05 02:53:38,038 (trainer:740) INFO: 27epoch:train:14701-14800batch: iter_time=8.603e-05, forward_time=0.290, loss_ctc=46.175, loss_att=43.075, acc=0.744, loss=44.005, backward_time=0.290, grad_norm=43.400, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.925e-04, train_time=1.605 +[gpub058:0/16] 2024-02-05 02:55:50,663 (trainer:740) INFO: 27epoch:train:14801-14900batch: iter_time=8.611e-05, forward_time=0.387, loss_ctc=47.216, loss_att=43.422, acc=0.758, loss=44.561, backward_time=0.301, grad_norm=38.770, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.925e-04, train_time=1.326 +[gpub058:0/16] 2024-02-05 02:58:05,930 (trainer:740) INFO: 27epoch:train:14901-15000batch: iter_time=5.440e-04, forward_time=0.310, loss_ctc=50.156, loss_att=52.655, acc=0.734, loss=51.905, backward_time=0.302, grad_norm=43.164, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.925e-04, train_time=1.350 +[gpub058:0/16] 2024-02-05 03:36:54,782 (trainer:346) INFO: 27epoch results: [train] iter_time=0.275, forward_time=0.342, loss_ctc=47.375, loss_att=46.224, acc=0.752, loss=46.569, backward_time=0.308, grad_norm=38.151, clip=100.000, loss_scale=6.401e+33, optim_step_time=0.096, optim0_lr0=1.943e-04, train_time=2.226, time=9 hours, 16 minutes and 55.03 seconds, total_count=435000, gpu_max_cached_mem_GB=41.105, [valid] loss_ctc=38.776, cer_ctc=0.195, loss_att=42.847, acc=0.662, cer=0.333, wer=0.997, loss=41.625, time=38 minutes and 24.8 seconds, total_count=135459, gpu_max_cached_mem_GB=41.105 +[gpub058:0/16] 2024-02-05 03:37:16,882 (trainer:394) INFO: The best model has been updated: valid.total_count +[gpub058:0/16] 2024-02-05 03:37:16,986 (trainer:275) INFO: 28/45epoch started. Estimated time to finish: 1 week, 10 hours and 42 minutes +[gpub058:0/16] 2024-02-05 03:37:16,998 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-05 03:37:34,627 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 03:37:37,965 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 03:37:37,965 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub058:0/16] 2024-02-05 03:37:37,969 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 03:44:54,039 (trainer:740) INFO: 28epoch:train:1-100batch: iter_time=3.252, forward_time=0.391, loss_ctc=47.164, loss_att=53.798, acc=0.732, loss=51.808, backward_time=0.297, grad_norm=43.256, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.925e-04, train_time=4.570 +[gpub058:0/16] 2024-02-05 03:47:07,409 (trainer:740) INFO: 28epoch:train:101-200batch: iter_time=8.986e-05, forward_time=0.293, loss_ctc=51.828, loss_att=53.255, acc=0.729, loss=52.827, backward_time=0.288, grad_norm=44.816, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.924e-04, train_time=1.333 +[gpub058:0/16] 2024-02-05 03:49:51,260 (trainer:740) INFO: 28epoch:train:201-300batch: iter_time=8.328e-05, forward_time=0.384, loss_ctc=60.384, loss_att=60.863, acc=0.732, loss=60.719, backward_time=0.328, grad_norm=56.044, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.924e-04, train_time=1.639 +[gpub058:0/16] 2024-02-05 03:52:16,205 (trainer:740) INFO: 28epoch:train:301-400batch: iter_time=8.430e-05, forward_time=0.291, loss_ctc=56.676, loss_att=51.297, acc=0.737, loss=52.911, backward_time=0.285, grad_norm=51.799, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.924e-04, train_time=1.449 +[gpub058:0/16] 2024-02-05 03:54:27,121 (trainer:740) INFO: 28epoch:train:401-500batch: iter_time=9.103e-05, forward_time=0.352, loss_ctc=52.934, loss_att=50.603, acc=0.737, loss=51.303, backward_time=0.312, grad_norm=39.038, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.924e-04, train_time=1.309 +[gpub058:0/16] 2024-02-05 03:56:56,645 (trainer:740) INFO: 28epoch:train:501-600batch: iter_time=9.576e-05, forward_time=0.322, loss_ctc=40.904, loss_att=40.145, acc=0.752, loss=40.373, backward_time=0.307, grad_norm=37.841, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.923e-04, train_time=1.495 +[gpub058:0/16] 2024-02-05 03:59:00,386 (trainer:740) INFO: 28epoch:train:601-700batch: iter_time=9.076e-05, forward_time=0.305, loss_ctc=48.447, loss_att=46.923, acc=0.725, loss=47.380, backward_time=0.288, grad_norm=47.289, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.923e-04, train_time=1.237 +[gpub058:0/16] 2024-02-05 04:01:16,159 (trainer:740) INFO: 28epoch:train:701-800batch: iter_time=9.111e-05, forward_time=0.382, loss_ctc=49.188, loss_att=42.753, acc=0.751, loss=44.683, backward_time=0.340, grad_norm=38.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.923e-04, train_time=1.357 +[gpub058:0/16] 2024-02-05 04:03:53,455 (trainer:740) INFO: 28epoch:train:801-900batch: iter_time=8.597e-05, forward_time=0.290, loss_ctc=54.711, loss_att=43.822, acc=0.746, loss=47.089, backward_time=0.284, grad_norm=43.528, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.923e-04, train_time=1.573 +[gpub058:0/16] 2024-02-05 04:06:02,855 (trainer:740) INFO: 28epoch:train:901-1000batch: iter_time=8.997e-05, forward_time=0.312, loss_ctc=48.201, loss_att=50.356, acc=0.726, loss=49.709, backward_time=0.307, grad_norm=40.562, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.922e-04, train_time=1.294 +[gpub058:0/16] 2024-02-05 04:08:32,222 (trainer:740) INFO: 28epoch:train:1001-1100batch: iter_time=8.517e-05, forward_time=0.389, loss_ctc=48.925, loss_att=50.682, acc=0.727, loss=50.155, backward_time=0.343, grad_norm=38.186, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.922e-04, train_time=1.493 +[gpub058:0/16] 2024-02-05 04:10:36,069 (trainer:740) INFO: 28epoch:train:1101-1200batch: iter_time=8.893e-05, forward_time=0.292, loss_ctc=54.764, loss_att=51.018, acc=0.731, loss=52.142, backward_time=0.287, grad_norm=65.466, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.922e-04, train_time=1.238 +[gpub058:0/16] 2024-02-05 04:12:08,475 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub058:0/16] 2024-02-05 04:12:27,189 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 04:12:30,724 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 04:12:30,724 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub058:0/16] 2024-02-05 04:12:30,729 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 04:19:07,063 (trainer:740) INFO: 28epoch:train:1201-1300batch: iter_time=3.656, forward_time=0.392, loss_ctc=46.142, loss_att=49.986, acc=0.743, loss=48.832, backward_time=0.314, grad_norm=36.486, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.922e-04, train_time=5.110 +[gpub058:0/16] 2024-02-05 04:20:56,746 (trainer:740) INFO: 28epoch:train:1301-1400batch: iter_time=8.567e-05, forward_time=0.293, loss_ctc=49.340, loss_att=55.265, acc=0.738, loss=53.488, backward_time=0.289, grad_norm=43.032, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.921e-04, train_time=1.097 +[gpub058:0/16] 2024-02-05 04:23:14,699 (trainer:740) INFO: 28epoch:train:1401-1500batch: iter_time=8.779e-05, forward_time=0.297, loss_ctc=59.807, loss_att=59.774, acc=0.738, loss=59.784, backward_time=0.300, grad_norm=57.817, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.921e-04, train_time=1.379 +[gpub058:0/16] 2024-02-05 04:25:51,093 (trainer:740) INFO: 28epoch:train:1501-1600batch: iter_time=9.659e-05, forward_time=0.346, loss_ctc=51.418, loss_att=48.096, acc=0.757, loss=49.093, backward_time=0.347, grad_norm=45.055, clip=100.000, loss_scale=9.606e+33, optim_step_time=0.097, optim0_lr0=1.921e-04, train_time=1.563 +[gpub058:0/16] 2024-02-05 04:28:07,752 (trainer:740) INFO: 28epoch:train:1601-1700batch: iter_time=9.652e-05, forward_time=0.338, loss_ctc=52.249, loss_att=50.549, acc=0.754, loss=51.059, backward_time=0.290, grad_norm=41.293, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.921e-04, train_time=1.367 +[gpub058:0/16] 2024-02-05 04:30:03,870 (trainer:740) INFO: 28epoch:train:1701-1800batch: iter_time=9.362e-05, forward_time=0.311, loss_ctc=48.808, loss_att=52.450, acc=0.736, loss=51.357, backward_time=0.304, grad_norm=39.442, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.921e-04, train_time=1.161 +[gpub058:0/16] 2024-02-05 04:32:48,535 (trainer:740) INFO: 28epoch:train:1801-1900batch: iter_time=9.390e-05, forward_time=0.334, loss_ctc=46.185, loss_att=42.859, acc=0.756, loss=43.857, backward_time=0.338, grad_norm=45.651, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.920e-04, train_time=1.646 +[gpub058:0/16] 2024-02-05 04:34:45,257 (trainer:740) INFO: 28epoch:train:1901-2000batch: iter_time=9.427e-05, forward_time=0.288, loss_ctc=43.458, loss_att=37.455, acc=0.757, loss=39.256, backward_time=0.284, grad_norm=36.243, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.920e-04, train_time=1.167 +[gpub058:0/16] 2024-02-05 04:37:14,859 (trainer:740) INFO: 28epoch:train:2001-2100batch: iter_time=9.007e-05, forward_time=0.307, loss_ctc=46.147, loss_att=41.202, acc=0.756, loss=42.685, backward_time=0.288, grad_norm=37.495, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.920e-04, train_time=1.496 +[gpub058:0/16] 2024-02-05 04:39:50,161 (trainer:740) INFO: 28epoch:train:2101-2200batch: iter_time=8.856e-05, forward_time=0.406, loss_ctc=52.034, loss_att=46.519, acc=0.745, loss=48.174, backward_time=0.307, grad_norm=43.221, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.920e-04, train_time=1.553 +[gpub058:0/16] 2024-02-05 04:42:06,305 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 04:42:15,221 (trainer:740) INFO: 28epoch:train:2201-2300batch: iter_time=8.974e-05, forward_time=0.291, loss_ctc=48.591, loss_att=51.806, acc=0.745, loss=50.841, backward_time=0.286, grad_norm=38.122, clip=100.000, loss_scale=9.965e+33, optim_step_time=0.092, optim0_lr0=1.919e-04, train_time=1.450 +[gpub058:0/16] 2024-02-05 04:44:14,914 (trainer:740) INFO: 28epoch:train:2301-2400batch: iter_time=8.515e-05, forward_time=0.305, loss_ctc=53.417, loss_att=54.251, acc=0.725, loss=54.001, backward_time=0.304, grad_norm=40.608, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.919e-04, train_time=1.197 +[gpub058:0/16] 2024-02-05 04:46:29,200 (trainer:740) INFO: 28epoch:train:2401-2500batch: iter_time=8.144e-05, forward_time=0.297, loss_ctc=49.999, loss_att=50.562, acc=0.754, loss=50.393, backward_time=0.286, grad_norm=38.747, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.919e-04, train_time=1.343 +[gpub058:0/16] 2024-02-05 04:46:49,228 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub058:0/16] 2024-02-05 04:47:08,047 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 04:47:11,526 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 04:47:11,526 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub058:0/16] 2024-02-05 04:47:11,549 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 04:54:20,670 (trainer:740) INFO: 28epoch:train:2501-2600batch: iter_time=3.490, forward_time=0.388, loss_ctc=45.988, loss_att=52.563, acc=0.739, loss=50.591, backward_time=0.296, grad_norm=41.545, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.919e-04, train_time=4.714 +[gpub058:0/16] 2024-02-05 04:56:40,198 (trainer:740) INFO: 28epoch:train:2601-2700batch: iter_time=8.131e-05, forward_time=0.310, loss_ctc=49.830, loss_att=50.833, acc=0.736, loss=50.532, backward_time=0.286, grad_norm=44.111, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.918e-04, train_time=1.395 +[gpub058:0/16] 2024-02-05 04:59:15,855 (trainer:740) INFO: 28epoch:train:2701-2800batch: iter_time=8.532e-05, forward_time=0.369, loss_ctc=56.456, loss_att=58.841, acc=0.736, loss=58.126, backward_time=0.353, grad_norm=52.460, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.918e-04, train_time=1.557 +[gpub058:0/16] 2024-02-05 05:01:40,556 (trainer:740) INFO: 28epoch:train:2801-2900batch: iter_time=8.155e-05, forward_time=0.293, loss_ctc=53.205, loss_att=49.202, acc=0.743, loss=50.403, backward_time=0.293, grad_norm=48.258, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.918e-04, train_time=1.447 +[gpub058:0/16] 2024-02-05 05:03:42,893 (trainer:740) INFO: 28epoch:train:2901-3000batch: iter_time=8.315e-05, forward_time=0.292, loss_ctc=51.925, loss_att=48.940, acc=0.743, loss=49.835, backward_time=0.289, grad_norm=38.997, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.918e-04, train_time=1.223 +[gpub058:0/16] 2024-02-05 05:06:02,847 (trainer:740) INFO: 28epoch:train:3001-3100batch: iter_time=8.096e-05, forward_time=0.315, loss_ctc=40.035, loss_att=39.314, acc=0.757, loss=39.530, backward_time=0.283, grad_norm=37.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.917e-04, train_time=1.399 +[gpub058:0/16] 2024-02-05 05:08:48,366 (trainer:740) INFO: 28epoch:train:3101-3200batch: iter_time=9.025e-05, forward_time=0.400, loss_ctc=47.482, loss_att=46.486, acc=0.728, loss=46.785, backward_time=0.311, grad_norm=43.571, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.917e-04, train_time=1.655 +[gpub058:0/16] 2024-02-05 05:10:38,288 (trainer:740) INFO: 28epoch:train:3201-3300batch: iter_time=9.027e-05, forward_time=0.291, loss_ctc=47.972, loss_att=42.118, acc=0.753, loss=43.874, backward_time=0.284, grad_norm=37.896, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.917e-04, train_time=1.099 +[gpub058:0/16] 2024-02-05 05:12:59,374 (trainer:740) INFO: 28epoch:train:3301-3400batch: iter_time=8.831e-05, forward_time=0.290, loss_ctc=52.295, loss_att=43.193, acc=0.748, loss=45.923, backward_time=0.286, grad_norm=40.527, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.917e-04, train_time=1.411 +[gpub058:0/16] 2024-02-05 05:15:13,805 (trainer:740) INFO: 28epoch:train:3401-3500batch: iter_time=8.399e-05, forward_time=0.295, loss_ctc=46.888, loss_att=50.126, acc=0.731, loss=49.155, backward_time=0.294, grad_norm=38.397, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.917e-04, train_time=1.343 +[gpub058:0/16] 2024-02-05 05:17:30,461 (trainer:740) INFO: 28epoch:train:3501-3600batch: iter_time=8.445e-05, forward_time=0.357, loss_ctc=47.716, loss_att=49.715, acc=0.731, loss=49.115, backward_time=0.314, grad_norm=37.524, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.916e-04, train_time=1.367 +[gpub058:0/16] 2024-02-05 05:20:01,200 (trainer:740) INFO: 28epoch:train:3601-3700batch: iter_time=8.379e-05, forward_time=0.292, loss_ctc=54.829, loss_att=50.961, acc=0.732, loss=52.121, backward_time=0.286, grad_norm=39.795, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.916e-04, train_time=1.507 +[gpub058:0/16] 2024-02-05 05:21:15,623 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub058:0/16] 2024-02-05 05:21:34,904 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 05:21:38,818 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 05:21:38,819 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-05 05:21:38,822 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 05:27:38,488 (trainer:740) INFO: 28epoch:train:3701-3800batch: iter_time=3.447, forward_time=0.298, loss_ctc=45.511, loss_att=49.346, acc=0.743, loss=48.195, backward_time=0.286, grad_norm=36.823, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.916e-04, train_time=4.572 +[gpub058:0/16] 2024-02-05 05:30:13,138 (trainer:740) INFO: 28epoch:train:3801-3900batch: iter_time=8.454e-05, forward_time=0.341, loss_ctc=48.676, loss_att=53.234, acc=0.731, loss=51.867, backward_time=0.313, grad_norm=43.321, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.916e-04, train_time=1.547 +[gpub058:0/16] 2024-02-05 05:32:18,857 (trainer:740) INFO: 28epoch:train:3901-4000batch: iter_time=8.267e-05, forward_time=0.290, loss_ctc=58.231, loss_att=58.463, acc=0.738, loss=58.394, backward_time=0.286, grad_norm=58.213, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.915e-04, train_time=1.257 +[gpub058:0/16] 2024-02-05 05:34:21,425 (trainer:740) INFO: 28epoch:train:4001-4100batch: iter_time=8.473e-05, forward_time=0.301, loss_ctc=49.926, loss_att=46.608, acc=0.751, loss=47.603, backward_time=0.292, grad_norm=44.349, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.915e-04, train_time=1.225 +[gpub058:0/16] 2024-02-05 05:37:23,687 (trainer:740) INFO: 28epoch:train:4101-4200batch: iter_time=8.532e-05, forward_time=0.343, loss_ctc=51.708, loss_att=50.265, acc=0.746, loss=50.698, backward_time=0.368, grad_norm=42.448, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.915e-04, train_time=1.823 +[gpub058:0/16] 2024-02-05 05:39:16,734 (trainer:740) INFO: 28epoch:train:4201-4300batch: iter_time=8.519e-05, forward_time=0.290, loss_ctc=48.300, loss_att=50.500, acc=0.732, loss=49.840, backward_time=0.284, grad_norm=40.086, clip=100.000, loss_scale=5.608e+33, optim_step_time=0.093, optim0_lr0=1.915e-04, train_time=1.130 +[gpub058:0/16] 2024-02-05 05:41:00,973 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 05:41:25,238 (trainer:740) INFO: 28epoch:train:4301-4400batch: iter_time=8.409e-05, forward_time=0.307, loss_ctc=45.472, loss_att=43.025, acc=0.744, loss=43.760, backward_time=0.288, grad_norm=43.977, clip=100.000, loss_scale=9.283e+33, optim_step_time=0.094, optim0_lr0=1.914e-04, train_time=1.285 +[gpub058:0/16] 2024-02-05 05:43:56,411 (trainer:740) INFO: 28epoch:train:4401-4500batch: iter_time=8.743e-05, forward_time=0.375, loss_ctc=42.943, loss_att=36.808, acc=0.760, loss=38.648, backward_time=0.305, grad_norm=37.688, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.914e-04, train_time=1.512 +[gpub058:0/16] 2024-02-05 05:46:06,975 (trainer:740) INFO: 28epoch:train:4501-4600batch: iter_time=8.468e-05, forward_time=0.289, loss_ctc=45.627, loss_att=40.642, acc=0.752, loss=42.138, backward_time=0.287, grad_norm=36.878, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.914e-04, train_time=1.305 +[gpub058:0/16] 2024-02-05 05:48:26,014 (trainer:740) INFO: 28epoch:train:4601-4700batch: iter_time=6.109e-04, forward_time=0.302, loss_ctc=51.648, loss_att=45.380, acc=0.740, loss=47.260, backward_time=0.308, grad_norm=44.114, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.914e-04, train_time=1.390 +[gpub058:0/16] 2024-02-05 05:50:49,638 (trainer:740) INFO: 28epoch:train:4701-4800batch: iter_time=8.949e-05, forward_time=0.340, loss_ctc=47.812, loss_att=49.786, acc=0.744, loss=49.194, backward_time=0.338, grad_norm=36.191, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.914e-04, train_time=1.436 +[gpub058:0/16] 2024-02-05 05:53:10,555 (trainer:740) INFO: 28epoch:train:4801-4900batch: iter_time=8.406e-05, forward_time=0.330, loss_ctc=53.120, loss_att=52.803, acc=0.717, loss=52.898, backward_time=0.289, grad_norm=42.554, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.913e-04, train_time=1.409 +[gpub058:0/16] 2024-02-05 05:55:26,668 (trainer:740) INFO: 28epoch:train:4901-5000batch: iter_time=8.571e-05, forward_time=0.290, loss_ctc=49.974, loss_att=50.219, acc=0.750, loss=50.145, backward_time=0.286, grad_norm=39.000, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.913e-04, train_time=1.361 +[gpub058:0/16] 2024-02-05 05:55:46,697 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub058:0/16] 2024-02-05 05:56:05,609 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 05:56:09,161 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 05:56:09,161 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub058:0/16] 2024-02-05 05:56:09,166 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 06:03:16,211 (trainer:740) INFO: 28epoch:train:5001-5100batch: iter_time=3.258, forward_time=0.390, loss_ctc=45.446, loss_att=51.677, acc=0.743, loss=49.808, backward_time=0.310, grad_norm=37.981, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.913e-04, train_time=4.695 +[gpub058:0/16] 2024-02-05 06:03:32,786 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 06:05:45,543 (trainer:740) INFO: 28epoch:train:5101-5200batch: iter_time=8.543e-05, forward_time=0.307, loss_ctc=49.566, loss_att=51.050, acc=0.737, loss=50.604, backward_time=0.286, grad_norm=42.905, clip=100.000, loss_scale=2.885e+33, optim_step_time=0.093, optim0_lr0=1.913e-04, train_time=1.494 +[gpub058:0/16] 2024-02-05 06:07:55,639 (trainer:740) INFO: 28epoch:train:5201-5300batch: iter_time=9.053e-05, forward_time=0.292, loss_ctc=54.753, loss_att=57.315, acc=0.739, loss=56.546, backward_time=0.289, grad_norm=52.802, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.912e-04, train_time=1.301 +[gpub058:0/16] 2024-02-05 06:10:21,435 (trainer:740) INFO: 28epoch:train:5301-5400batch: iter_time=8.988e-05, forward_time=0.388, loss_ctc=52.811, loss_att=48.964, acc=0.747, loss=50.118, backward_time=0.323, grad_norm=50.837, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.912e-04, train_time=1.458 +[gpub058:0/16] 2024-02-05 06:12:28,216 (trainer:740) INFO: 28epoch:train:5401-5500batch: iter_time=8.541e-05, forward_time=0.300, loss_ctc=50.971, loss_att=48.727, acc=0.745, loss=49.400, backward_time=0.291, grad_norm=39.234, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.912e-04, train_time=1.267 +[gpub058:0/16] 2024-02-05 06:14:35,221 (trainer:740) INFO: 28epoch:train:5501-5600batch: iter_time=9.116e-05, forward_time=0.288, loss_ctc=39.890, loss_att=38.373, acc=0.761, loss=38.828, backward_time=0.282, grad_norm=36.788, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.912e-04, train_time=1.271 +[gpub058:0/16] 2024-02-05 06:17:17,801 (trainer:740) INFO: 28epoch:train:5601-5700batch: iter_time=9.191e-05, forward_time=0.356, loss_ctc=46.714, loss_att=45.533, acc=0.731, loss=45.887, backward_time=0.340, grad_norm=43.297, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.911e-04, train_time=1.626 +[gpub058:0/16] 2024-02-05 06:19:33,297 (trainer:740) INFO: 28epoch:train:5701-5800batch: iter_time=9.497e-05, forward_time=0.299, loss_ctc=47.625, loss_att=41.302, acc=0.756, loss=43.199, backward_time=0.285, grad_norm=37.278, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.911e-04, train_time=1.355 +[gpub058:0/16] 2024-02-05 06:21:31,488 (trainer:740) INFO: 28epoch:train:5801-5900batch: iter_time=9.603e-05, forward_time=0.300, loss_ctc=52.013, loss_att=42.563, acc=0.754, loss=45.398, backward_time=0.292, grad_norm=41.874, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.911e-04, train_time=1.181 +[gpub058:0/16] 2024-02-05 06:24:11,817 (trainer:740) INFO: 28epoch:train:5901-6000batch: iter_time=8.729e-05, forward_time=0.292, loss_ctc=46.652, loss_att=49.125, acc=0.732, loss=48.383, backward_time=0.284, grad_norm=37.831, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.911e-04, train_time=1.604 +[gpub058:0/16] 2024-02-05 06:26:33,912 (trainer:740) INFO: 28epoch:train:6001-6100batch: iter_time=9.435e-05, forward_time=0.430, loss_ctc=47.466, loss_att=49.477, acc=0.734, loss=48.874, backward_time=0.342, grad_norm=39.430, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.910e-04, train_time=1.421 +[gpub058:0/16] 2024-02-05 06:29:11,659 (trainer:740) INFO: 28epoch:train:6101-6200batch: iter_time=9.351e-05, forward_time=0.300, loss_ctc=53.904, loss_att=50.497, acc=0.735, loss=51.519, backward_time=0.294, grad_norm=39.475, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.910e-04, train_time=1.577 +[gpub058:0/16] 2024-02-05 06:30:26,469 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub058:0/16] 2024-02-05 06:30:45,572 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 06:30:49,145 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 06:30:49,145 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub058:0/16] 2024-02-05 06:30:49,148 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 06:37:07,188 (trainer:740) INFO: 28epoch:train:6201-6300batch: iter_time=3.537, forward_time=0.382, loss_ctc=44.676, loss_att=48.449, acc=0.746, loss=47.317, backward_time=0.303, grad_norm=35.784, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.910e-04, train_time=4.755 +[gpub058:0/16] 2024-02-05 06:39:48,581 (trainer:740) INFO: 28epoch:train:6301-6400batch: iter_time=8.426e-05, forward_time=0.290, loss_ctc=48.180, loss_att=52.861, acc=0.732, loss=51.457, backward_time=0.285, grad_norm=42.074, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.910e-04, train_time=1.614 +[gpub058:0/16] 2024-02-05 06:41:56,163 (trainer:740) INFO: 28epoch:train:6401-6500batch: iter_time=8.094e-05, forward_time=0.309, loss_ctc=56.192, loss_att=58.194, acc=0.739, loss=57.594, backward_time=0.297, grad_norm=59.012, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.910e-04, train_time=1.275 +[gpub058:0/16] 2024-02-05 06:44:43,999 (trainer:740) INFO: 28epoch:train:6501-6600batch: iter_time=9.073e-05, forward_time=0.348, loss_ctc=49.900, loss_att=45.917, acc=0.754, loss=47.112, backward_time=0.415, grad_norm=42.993, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.106, optim0_lr0=1.909e-04, train_time=1.678 +[gpub058:0/16] 2024-02-05 06:46:45,237 (trainer:740) INFO: 28epoch:train:6601-6700batch: iter_time=8.862e-05, forward_time=0.292, loss_ctc=51.305, loss_att=50.046, acc=0.750, loss=50.424, backward_time=0.287, grad_norm=40.067, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.909e-04, train_time=1.212 +[gpub058:0/16] 2024-02-05 06:49:03,678 (trainer:740) INFO: 28epoch:train:6701-6800batch: iter_time=8.921e-05, forward_time=0.310, loss_ctc=47.321, loss_att=49.619, acc=0.736, loss=48.929, backward_time=0.286, grad_norm=39.235, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.092, optim0_lr0=1.909e-04, train_time=1.384 +[gpub058:0/16] 2024-02-05 06:51:14,645 (trainer:740) INFO: 28epoch:train:6801-6900batch: iter_time=8.957e-05, forward_time=0.318, loss_ctc=45.069, loss_att=42.913, acc=0.745, loss=43.560, backward_time=0.346, grad_norm=40.608, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.909e-04, train_time=1.309 +[gpub058:0/16] 2024-02-05 06:53:42,705 (trainer:740) INFO: 28epoch:train:6901-7000batch: iter_time=8.541e-05, forward_time=0.306, loss_ctc=42.641, loss_att=36.931, acc=0.759, loss=38.644, backward_time=0.292, grad_norm=37.864, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.908e-04, train_time=1.480 +[gpub058:0/16] 2024-02-05 06:55:44,531 (trainer:740) INFO: 28epoch:train:7001-7100batch: iter_time=8.100e-05, forward_time=0.289, loss_ctc=45.355, loss_att=40.469, acc=0.754, loss=41.934, backward_time=0.284, grad_norm=36.625, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.908e-04, train_time=1.218 +[gpub058:0/16] 2024-02-05 06:57:47,597 (trainer:740) INFO: 28epoch:train:7101-7200batch: iter_time=8.675e-05, forward_time=0.299, loss_ctc=50.998, loss_att=45.037, acc=0.742, loss=46.825, backward_time=0.292, grad_norm=46.151, clip=100.000, loss_scale=4.881e+33, optim_step_time=0.093, optim0_lr0=1.908e-04, train_time=1.230 +[gpub058:0/16] 2024-02-05 07:00:43,539 (trainer:740) INFO: 28epoch:train:7201-7300batch: iter_time=8.145e-05, forward_time=0.419, loss_ctc=47.913, loss_att=50.057, acc=0.744, loss=49.414, backward_time=0.304, grad_norm=36.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.109, optim0_lr0=1.908e-04, train_time=1.759 +[gpub058:0/16] 2024-02-05 07:02:40,670 (trainer:740) INFO: 28epoch:train:7301-7400batch: iter_time=8.249e-05, forward_time=0.291, loss_ctc=52.539, loss_att=51.887, acc=0.719, loss=52.083, backward_time=0.285, grad_norm=41.318, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.907e-04, train_time=1.171 +[gpub058:0/16] 2024-02-05 07:04:57,416 (trainer:740) INFO: 28epoch:train:7401-7500batch: iter_time=8.193e-05, forward_time=0.304, loss_ctc=49.739, loss_att=49.983, acc=0.751, loss=49.910, backward_time=0.288, grad_norm=37.622, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.907e-04, train_time=1.367 +[gpub058:0/16] 2024-02-05 07:05:17,445 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub058:0/16] 2024-02-05 07:05:36,577 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 07:05:40,209 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 07:05:40,210 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-05 07:05:40,213 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 07:13:11,531 (trainer:740) INFO: 28epoch:train:7501-7600batch: iter_time=3.467, forward_time=0.361, loss_ctc=44.744, loss_att=50.614, acc=0.745, loss=48.853, backward_time=0.297, grad_norm=37.346, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.907e-04, train_time=4.941 +[gpub058:0/16] 2024-02-05 07:15:44,758 (trainer:740) INFO: 28epoch:train:7601-7700batch: iter_time=8.157e-05, forward_time=0.289, loss_ctc=49.116, loss_att=50.184, acc=0.739, loss=49.863, backward_time=0.284, grad_norm=41.809, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.907e-04, train_time=1.532 +[gpub058:0/16] 2024-02-05 07:19:25,512 (trainer:740) INFO: 28epoch:train:7701-7800batch: iter_time=2.591e-04, forward_time=0.366, loss_ctc=54.096, loss_att=57.609, acc=0.741, loss=56.555, backward_time=0.407, grad_norm=52.450, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.907e-04, train_time=2.207 +[gpub058:0/16] 2024-02-05 07:22:11,246 (trainer:740) INFO: 28epoch:train:7801-7900batch: iter_time=7.880e-05, forward_time=0.291, loss_ctc=52.212, loss_att=48.234, acc=0.747, loss=49.427, backward_time=0.283, grad_norm=45.086, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.906e-04, train_time=1.657 +[gpub058:0/16] 2024-02-05 07:24:50,809 (trainer:740) INFO: 28epoch:train:7901-8000batch: iter_time=8.014e-05, forward_time=0.290, loss_ctc=50.654, loss_att=48.203, acc=0.747, loss=48.939, backward_time=0.285, grad_norm=41.554, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.906e-04, train_time=1.595 +[gpub058:0/16] 2024-02-05 07:27:35,334 (trainer:740) INFO: 28epoch:train:8001-8100batch: iter_time=8.073e-05, forward_time=0.286, loss_ctc=39.493, loss_att=38.597, acc=0.761, loss=38.866, backward_time=0.279, grad_norm=35.655, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.906e-04, train_time=1.645 +[gpub058:0/16] 2024-02-05 07:30:43,260 (trainer:740) INFO: 28epoch:train:8101-8200batch: iter_time=8.696e-05, forward_time=0.337, loss_ctc=46.283, loss_att=45.115, acc=0.732, loss=45.466, backward_time=0.372, grad_norm=44.044, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.906e-04, train_time=1.879 +[gpub058:0/16] 2024-02-05 07:33:20,122 (trainer:740) INFO: 28epoch:train:8201-8300batch: iter_time=9.278e-05, forward_time=0.291, loss_ctc=47.412, loss_att=41.420, acc=0.756, loss=43.217, backward_time=0.284, grad_norm=40.078, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.905e-04, train_time=1.569 +[gpub058:0/16] 2024-02-05 07:36:10,284 (trainer:740) INFO: 28epoch:train:8301-8400batch: iter_time=9.274e-05, forward_time=0.288, loss_ctc=51.432, loss_att=42.466, acc=0.753, loss=45.156, backward_time=0.283, grad_norm=40.208, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.905e-04, train_time=1.701 +[gpub058:0/16] 2024-02-05 07:38:43,511 (trainer:740) INFO: 28epoch:train:8401-8500batch: iter_time=8.881e-05, forward_time=0.348, loss_ctc=46.819, loss_att=49.252, acc=0.734, loss=48.522, backward_time=0.357, grad_norm=40.789, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.905e-04, train_time=1.532 +[gpub058:0/16] 2024-02-05 07:41:22,711 (trainer:740) INFO: 28epoch:train:8501-8600batch: iter_time=9.447e-05, forward_time=0.289, loss_ctc=47.061, loss_att=49.249, acc=0.736, loss=48.592, backward_time=0.284, grad_norm=37.472, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.905e-04, train_time=1.591 +[gpub058:0/16] 2024-02-05 07:43:56,527 (trainer:740) INFO: 28epoch:train:8601-8700batch: iter_time=8.552e-05, forward_time=0.292, loss_ctc=53.502, loss_att=50.019, acc=0.737, loss=51.064, backward_time=0.286, grad_norm=39.808, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.904e-04, train_time=1.539 +[gpub058:0/16] 2024-02-05 07:45:19,008 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub058:0/16] 2024-02-05 07:45:37,986 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 07:45:41,578 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 07:45:41,578 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub058:0/16] 2024-02-05 07:45:41,581 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 07:52:16,166 (trainer:740) INFO: 28epoch:train:8701-8800batch: iter_time=3.492, forward_time=0.445, loss_ctc=44.445, loss_att=49.254, acc=0.749, loss=47.811, backward_time=0.318, grad_norm=36.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.904e-04, train_time=4.996 +[gpub058:0/16] 2024-02-05 07:54:27,442 (trainer:740) INFO: 28epoch:train:8801-8900batch: iter_time=8.767e-05, forward_time=0.293, loss_ctc=47.960, loss_att=55.367, acc=0.741, loss=53.145, backward_time=0.286, grad_norm=42.213, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.904e-04, train_time=1.312 +[gpub058:0/16] 2024-02-05 07:56:34,899 (trainer:740) INFO: 28epoch:train:8901-9000batch: iter_time=8.490e-05, forward_time=0.294, loss_ctc=55.230, loss_att=58.787, acc=0.745, loss=57.720, backward_time=0.286, grad_norm=56.142, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.904e-04, train_time=1.275 +[gpub058:0/16] 2024-02-05 07:59:47,429 (trainer:740) INFO: 28epoch:train:9001-9100batch: iter_time=9.611e-05, forward_time=0.360, loss_ctc=49.405, loss_att=47.431, acc=0.762, loss=48.023, backward_time=0.424, grad_norm=42.186, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.904e-04, train_time=1.925 +[gpub058:0/16] 2024-02-05 08:02:02,668 (trainer:740) INFO: 28epoch:train:9101-9200batch: iter_time=9.346e-05, forward_time=0.292, loss_ctc=50.639, loss_att=50.580, acc=0.758, loss=50.597, backward_time=0.288, grad_norm=41.308, clip=100.000, loss_scale=9.762e+33, optim_step_time=0.093, optim0_lr0=1.903e-04, train_time=1.352 +[gpub058:0/16] 2024-02-05 08:04:20,205 (trainer:740) INFO: 28epoch:train:9201-9300batch: iter_time=8.899e-05, forward_time=0.290, loss_ctc=47.575, loss_att=51.500, acc=0.745, loss=50.323, backward_time=0.286, grad_norm=40.521, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.903e-04, train_time=1.375 +[gpub058:0/16] 2024-02-05 08:07:10,631 (trainer:740) INFO: 28epoch:train:9301-9400batch: iter_time=8.870e-05, forward_time=0.368, loss_ctc=45.053, loss_att=43.052, acc=0.758, loss=43.652, backward_time=0.389, grad_norm=42.331, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=1.903e-04, train_time=1.704 +[gpub058:0/16] 2024-02-05 08:09:27,104 (trainer:740) INFO: 28epoch:train:9401-9500batch: iter_time=8.732e-05, forward_time=0.288, loss_ctc=42.349, loss_att=37.160, acc=0.763, loss=38.717, backward_time=0.282, grad_norm=35.678, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.903e-04, train_time=1.365 +[gpub058:0/16] 2024-02-05 08:11:20,055 (trainer:740) INFO: 28epoch:train:9501-9600batch: iter_time=8.747e-05, forward_time=0.290, loss_ctc=45.117, loss_att=40.870, acc=0.759, loss=42.144, backward_time=0.285, grad_norm=36.530, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.902e-04, train_time=1.129 +[gpub058:0/16] 2024-02-05 08:13:42,429 (trainer:740) INFO: 28epoch:train:9601-9700batch: iter_time=8.279e-05, forward_time=0.344, loss_ctc=50.782, loss_att=46.573, acc=0.749, loss=47.836, backward_time=0.306, grad_norm=46.250, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=1.902e-04, train_time=1.423 +[gpub058:0/16] 2024-02-05 08:16:32,331 (trainer:740) INFO: 28epoch:train:9701-9800batch: iter_time=8.805e-05, forward_time=0.352, loss_ctc=47.468, loss_att=51.631, acc=0.749, loss=50.382, backward_time=0.323, grad_norm=36.442, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.902e-04, train_time=1.699 +[gpub058:0/16] 2024-02-05 08:18:50,802 (trainer:740) INFO: 28epoch:train:9801-9900batch: iter_time=8.796e-05, forward_time=0.292, loss_ctc=52.748, loss_att=55.006, acc=0.723, loss=54.329, backward_time=0.288, grad_norm=41.233, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.092, optim0_lr0=1.902e-04, train_time=1.384 +[gpub058:0/16] 2024-02-05 08:21:05,935 (trainer:740) INFO: 28epoch:train:9901-10000batch: iter_time=8.446e-05, forward_time=0.291, loss_ctc=49.165, loss_att=49.622, acc=0.759, loss=49.485, backward_time=0.286, grad_norm=35.866, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.901e-04, train_time=1.351 +[gpub058:0/16] 2024-02-05 08:21:25,959 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub058:0/16] 2024-02-05 08:21:45,221 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 08:21:48,947 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 08:21:48,947 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub058:0/16] 2024-02-05 08:21:48,951 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 08:27:38,359 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 08:29:21,898 (trainer:740) INFO: 28epoch:train:10001-10100batch: iter_time=3.558, forward_time=0.360, loss_ctc=44.849, loss_att=50.778, acc=0.753, loss=49.000, backward_time=0.298, grad_norm=37.827, clip=100.000, loss_scale=6.451e+33, optim_step_time=0.095, optim0_lr0=1.901e-04, train_time=4.956 +[gpub058:0/16] 2024-02-05 08:31:42,820 (trainer:740) INFO: 28epoch:train:10101-10200batch: iter_time=0.091, forward_time=0.291, loss_ctc=48.872, loss_att=51.216, acc=0.745, loss=50.512, backward_time=0.286, grad_norm=42.840, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.901e-04, train_time=1.413 +[gpub058:0/16] 2024-02-05 08:36:05,294 (trainer:740) INFO: 28epoch:train:10201-10300batch: iter_time=8.443e-05, forward_time=0.353, loss_ctc=53.102, loss_att=57.181, acc=0.755, loss=55.957, backward_time=0.314, grad_norm=53.233, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.901e-04, train_time=2.625 +[gpub058:0/16] 2024-02-05 08:39:53,439 (trainer:740) INFO: 28epoch:train:10301-10400batch: iter_time=8.631e-05, forward_time=0.369, loss_ctc=52.351, loss_att=48.703, acc=0.757, loss=49.798, backward_time=0.300, grad_norm=47.099, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.901e-04, train_time=2.278 +[gpub058:0/16] 2024-02-05 08:42:41,683 (trainer:740) INFO: 28epoch:train:10401-10500batch: iter_time=8.608e-05, forward_time=0.293, loss_ctc=50.715, loss_att=50.361, acc=0.752, loss=50.467, backward_time=0.286, grad_norm=38.194, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.900e-04, train_time=1.686 +[gpub058:0/16] 2024-02-05 08:45:00,715 (trainer:740) INFO: 28epoch:train:10501-10600batch: iter_time=9.029e-05, forward_time=0.301, loss_ctc=39.531, loss_att=37.727, acc=0.774, loss=38.268, backward_time=0.290, grad_norm=35.272, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.900e-04, train_time=1.390 +[gpub058:0/16] 2024-02-05 08:48:45,112 (trainer:740) INFO: 28epoch:train:10601-10700batch: iter_time=0.300, forward_time=0.488, loss_ctc=46.156, loss_att=45.468, acc=0.743, loss=45.674, backward_time=0.333, grad_norm=42.037, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.118, optim0_lr0=1.900e-04, train_time=2.244 +[gpub058:0/16] 2024-02-05 08:51:12,615 (trainer:740) INFO: 28epoch:train:10701-10800batch: iter_time=8.762e-05, forward_time=0.293, loss_ctc=46.831, loss_att=41.301, acc=0.762, loss=42.960, backward_time=0.283, grad_norm=35.930, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.900e-04, train_time=1.475 +[gpub058:0/16] 2024-02-05 08:53:36,559 (trainer:740) INFO: 28epoch:train:10801-10900batch: iter_time=9.473e-05, forward_time=0.302, loss_ctc=50.998, loss_att=42.770, acc=0.758, loss=45.238, backward_time=0.304, grad_norm=42.087, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.899e-04, train_time=1.439 +[gpub058:0/16] 2024-02-05 08:56:11,695 (trainer:740) INFO: 28epoch:train:10901-11000batch: iter_time=8.786e-05, forward_time=0.370, loss_ctc=46.500, loss_att=50.408, acc=0.741, loss=49.236, backward_time=0.347, grad_norm=38.234, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.899e-04, train_time=1.551 +[gpub058:0/16] 2024-02-05 08:58:37,011 (trainer:740) INFO: 28epoch:train:11001-11100batch: iter_time=9.047e-05, forward_time=0.292, loss_ctc=46.479, loss_att=50.388, acc=0.741, loss=49.215, backward_time=0.287, grad_norm=37.639, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.092, optim0_lr0=1.899e-04, train_time=1.453 +[gpub058:0/16] 2024-02-05 09:01:05,036 (trainer:740) INFO: 28epoch:train:11101-11200batch: iter_time=2.680e-04, forward_time=0.424, loss_ctc=53.257, loss_att=52.092, acc=0.740, loss=52.441, backward_time=0.320, grad_norm=40.278, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.118, optim0_lr0=1.899e-04, train_time=1.479 +[gpub058:0/16] 2024-02-05 09:02:43,006 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub058:0/16] 2024-02-05 09:03:01,989 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 09:03:05,635 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 09:03:05,635 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub058:0/16] 2024-02-05 09:03:05,683 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 09:10:08,961 (trainer:740) INFO: 28epoch:train:11201-11300batch: iter_time=3.899, forward_time=0.444, loss_ctc=44.754, loss_att=47.299, acc=0.757, loss=46.536, backward_time=0.310, grad_norm=36.952, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.899e-04, train_time=5.439 +[gpub058:0/16] 2024-02-05 09:12:47,675 (trainer:740) INFO: 28epoch:train:11301-11400batch: iter_time=8.967e-05, forward_time=0.467, loss_ctc=47.760, loss_att=52.960, acc=0.745, loss=51.400, backward_time=0.319, grad_norm=42.152, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.898e-04, train_time=1.588 +[gpub058:0/16] 2024-02-05 09:14:53,047 (trainer:740) INFO: 28epoch:train:11401-11500batch: iter_time=9.265e-05, forward_time=0.291, loss_ctc=55.532, loss_att=58.064, acc=0.747, loss=57.305, backward_time=0.286, grad_norm=54.059, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.898e-04, train_time=1.253 +[gpub058:0/16] 2024-02-05 09:17:58,032 (trainer:740) INFO: 28epoch:train:11501-11600batch: iter_time=9.426e-05, forward_time=0.417, loss_ctc=49.647, loss_att=47.151, acc=0.761, loss=47.900, backward_time=0.418, grad_norm=43.821, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.898e-04, train_time=1.850 +[gpub058:0/16] 2024-02-05 09:20:24,804 (trainer:740) INFO: 28epoch:train:11601-11700batch: iter_time=0.001, forward_time=0.384, loss_ctc=50.656, loss_att=49.321, acc=0.761, loss=49.722, backward_time=0.408, grad_norm=40.757, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.126, optim0_lr0=1.898e-04, train_time=1.466 +[gpub058:0/16] 2024-02-05 09:23:37,611 (trainer:740) INFO: 28epoch:train:11701-11800batch: iter_time=9.082e-05, forward_time=0.386, loss_ctc=47.453, loss_att=50.427, acc=0.748, loss=49.534, backward_time=0.386, grad_norm=37.307, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.897e-04, train_time=1.929 +[gpub058:0/16] 2024-02-05 09:25:51,542 (trainer:740) INFO: 28epoch:train:11801-11900batch: iter_time=9.365e-05, forward_time=0.289, loss_ctc=44.825, loss_att=41.995, acc=0.761, loss=42.844, backward_time=0.284, grad_norm=42.431, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.897e-04, train_time=1.338 +[gpub058:0/16] 2024-02-05 09:28:04,099 (trainer:740) INFO: 28epoch:train:11901-12000batch: iter_time=8.632e-05, forward_time=0.288, loss_ctc=42.338, loss_att=37.128, acc=0.762, loss=38.691, backward_time=0.282, grad_norm=36.235, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.897e-04, train_time=1.327 +[gpub058:0/16] 2024-02-05 09:30:50,038 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 09:31:09,649 (trainer:740) INFO: 28epoch:train:12001-12100batch: iter_time=9.358e-05, forward_time=0.456, loss_ctc=44.752, loss_att=40.386, acc=0.762, loss=41.696, backward_time=0.350, grad_norm=36.871, clip=100.000, loss_scale=8.234e+33, optim_step_time=0.100, optim0_lr0=1.897e-04, train_time=1.854 +[gpub058:0/16] 2024-02-05 09:33:14,011 (trainer:740) INFO: 28epoch:train:12101-12200batch: iter_time=9.148e-05, forward_time=0.290, loss_ctc=50.223, loss_att=45.599, acc=0.750, loss=46.986, backward_time=0.286, grad_norm=43.121, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.896e-04, train_time=1.245 +[gpub058:0/16] 2024-02-05 09:36:11,510 (trainer:740) INFO: 28epoch:train:12201-12300batch: iter_time=8.023e-04, forward_time=0.446, loss_ctc=47.184, loss_att=51.018, acc=0.749, loss=49.868, backward_time=0.378, grad_norm=36.089, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.896e-04, train_time=1.773 +[gpub058:0/16] 2024-02-05 09:39:10,509 (trainer:740) INFO: 28epoch:train:12301-12400batch: iter_time=9.810e-05, forward_time=0.400, loss_ctc=52.417, loss_att=54.077, acc=0.725, loss=53.579, backward_time=0.350, grad_norm=40.881, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.896e-04, train_time=1.792 +[gpub058:0/16] 2024-02-05 09:41:14,230 (trainer:740) INFO: 28epoch:train:12401-12500batch: iter_time=8.423e-05, forward_time=0.292, loss_ctc=49.344, loss_att=49.569, acc=0.759, loss=49.502, backward_time=0.287, grad_norm=38.016, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.896e-04, train_time=1.233 +[gpub058:0/16] 2024-02-05 09:41:34,520 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub058:0/16] 2024-02-05 09:41:53,253 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 09:41:56,800 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 09:41:56,800 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub058:0/16] 2024-02-05 09:41:56,803 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 09:50:20,423 (trainer:740) INFO: 28epoch:train:12501-12600batch: iter_time=3.973, forward_time=0.376, loss_ctc=44.605, loss_att=50.092, acc=0.758, loss=48.446, backward_time=0.300, grad_norm=37.484, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.896e-04, train_time=5.465 +[gpub058:0/16] 2024-02-05 09:52:57,900 (trainer:740) INFO: 28epoch:train:12601-12700batch: iter_time=9.166e-05, forward_time=0.448, loss_ctc=48.549, loss_att=51.036, acc=0.746, loss=50.290, backward_time=0.355, grad_norm=40.535, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.895e-04, train_time=1.574 +[gpub058:0/16] 2024-02-05 09:55:29,942 (trainer:740) INFO: 28epoch:train:12701-12800batch: iter_time=8.938e-05, forward_time=0.293, loss_ctc=52.042, loss_att=56.945, acc=0.755, loss=55.474, backward_time=0.288, grad_norm=50.594, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.895e-04, train_time=1.521 +[gpub058:0/16] 2024-02-05 09:58:00,629 (trainer:740) INFO: 28epoch:train:12801-12900batch: iter_time=9.634e-05, forward_time=0.386, loss_ctc=51.752, loss_att=48.332, acc=0.759, loss=49.358, backward_time=0.362, grad_norm=45.688, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.895e-04, train_time=1.506 +[gpub058:0/16] 2024-02-05 10:00:25,535 (trainer:740) INFO: 28epoch:train:12901-13000batch: iter_time=9.595e-05, forward_time=0.292, loss_ctc=50.467, loss_att=49.587, acc=0.755, loss=49.851, backward_time=0.286, grad_norm=37.418, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.895e-04, train_time=1.449 +[gpub058:0/16] 2024-02-05 10:02:40,291 (trainer:740) INFO: 28epoch:train:13001-13100batch: iter_time=0.009, forward_time=0.461, loss_ctc=38.950, loss_att=37.502, acc=0.773, loss=37.936, backward_time=0.318, grad_norm=37.777, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.894e-04, train_time=1.347 +[gpub058:0/16] 2024-02-05 10:05:15,080 (trainer:740) INFO: 28epoch:train:13101-13200batch: iter_time=9.298e-05, forward_time=0.290, loss_ctc=46.132, loss_att=44.625, acc=0.746, loss=45.077, backward_time=0.289, grad_norm=42.104, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.894e-04, train_time=1.548 +[gpub058:0/16] 2024-02-05 10:08:18,481 (trainer:740) INFO: 28epoch:train:13201-13300batch: iter_time=6.383e-04, forward_time=0.448, loss_ctc=47.011, loss_att=41.156, acc=0.764, loss=42.913, backward_time=0.329, grad_norm=36.233, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.894e-04, train_time=1.833 +[gpub058:0/16] 2024-02-05 10:10:21,634 (trainer:740) INFO: 28epoch:train:13301-13400batch: iter_time=9.415e-05, forward_time=0.289, loss_ctc=50.811, loss_att=42.447, acc=0.760, loss=44.956, backward_time=0.284, grad_norm=37.929, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.894e-04, train_time=1.232 +[gpub058:0/16] 2024-02-05 10:13:15,675 (trainer:740) INFO: 28epoch:train:13401-13500batch: iter_time=9.068e-05, forward_time=0.396, loss_ctc=46.159, loss_att=49.945, acc=0.744, loss=48.809, backward_time=0.358, grad_norm=36.826, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.894e-04, train_time=1.740 +[gpub058:0/16] 2024-02-05 10:15:15,353 (trainer:740) INFO: 28epoch:train:13501-13600batch: iter_time=9.482e-05, forward_time=0.292, loss_ctc=46.474, loss_att=49.744, acc=0.745, loss=48.763, backward_time=0.287, grad_norm=37.187, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.893e-04, train_time=1.194 +[gpub058:0/16] 2024-02-05 10:18:04,025 (trainer:740) INFO: 28epoch:train:13601-13700batch: iter_time=9.932e-05, forward_time=0.433, loss_ctc=52.672, loss_att=51.756, acc=0.742, loss=52.031, backward_time=0.310, grad_norm=39.878, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.893e-04, train_time=1.689 +[gpub058:0/16] 2024-02-05 10:19:36,253 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub058:0/16] 2024-02-05 10:19:55,687 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 10:19:59,306 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 10:19:59,306 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub058:0/16] 2024-02-05 10:19:59,328 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 10:27:20,718 (trainer:740) INFO: 28epoch:train:13701-13800batch: iter_time=4.104, forward_time=0.288, loss_ctc=44.016, loss_att=47.434, acc=0.759, loss=46.408, backward_time=0.283, grad_norm=36.547, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.893e-04, train_time=5.567 +[gpub058:0/16] 2024-02-05 10:30:13,961 (trainer:740) INFO: 28epoch:train:13801-13900batch: iter_time=7.582e-05, forward_time=0.292, loss_ctc=47.584, loss_att=52.231, acc=0.750, loss=50.837, backward_time=0.285, grad_norm=39.878, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.893e-04, train_time=1.732 +[gpub058:0/16] 2024-02-05 10:32:55,028 (trainer:740) INFO: 28epoch:train:13901-14000batch: iter_time=8.566e-05, forward_time=0.459, loss_ctc=54.711, loss_att=57.915, acc=0.747, loss=56.954, backward_time=0.352, grad_norm=53.239, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.892e-04, train_time=1.610 +[gpub058:0/16] 2024-02-05 10:35:24,905 (trainer:740) INFO: 28epoch:train:14001-14100batch: iter_time=8.682e-05, forward_time=0.291, loss_ctc=49.084, loss_att=45.796, acc=0.765, loss=46.782, backward_time=0.286, grad_norm=44.815, clip=100.000, loss_scale=6.023e+33, optim_step_time=0.093, optim0_lr0=1.892e-04, train_time=1.499 +[gpub058:0/16] 2024-02-05 10:37:42,647 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +[gpub058:0/16] 2024-02-05 10:38:21,768 (trainer:740) INFO: 28epoch:train:14101-14200batch: iter_time=0.001, forward_time=0.469, loss_ctc=50.074, loss_att=49.123, acc=0.762, loss=49.408, backward_time=0.317, grad_norm=39.529, clip=100.000, loss_scale=1.012e+34, optim_step_time=0.110, optim0_lr0=1.892e-04, train_time=1.768 +[gpub058:0/16] 2024-02-05 10:40:45,617 (trainer:740) INFO: 28epoch:train:14201-14300batch: iter_time=8.466e-05, forward_time=0.292, loss_ctc=46.960, loss_att=50.880, acc=0.744, loss=49.704, backward_time=0.285, grad_norm=39.564, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.892e-04, train_time=1.439 +[gpub058:0/16] 2024-02-05 10:43:15,471 (trainer:740) INFO: 28epoch:train:14301-14400batch: iter_time=8.298e-05, forward_time=0.289, loss_ctc=45.064, loss_att=41.440, acc=0.762, loss=42.527, backward_time=0.282, grad_norm=40.669, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.891e-04, train_time=1.498 +[gpub058:0/16] 2024-02-05 10:46:11,931 (trainer:740) INFO: 28epoch:train:14401-14500batch: iter_time=9.149e-05, forward_time=0.521, loss_ctc=42.157, loss_att=36.650, acc=0.763, loss=38.302, backward_time=0.320, grad_norm=38.436, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.891e-04, train_time=1.764 +[gpub058:0/16] 2024-02-05 10:49:00,798 (trainer:740) INFO: 28epoch:train:14501-14600batch: iter_time=8.861e-05, forward_time=0.289, loss_ctc=44.508, loss_att=40.134, acc=0.763, loss=41.446, backward_time=0.282, grad_norm=34.705, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.891e-04, train_time=1.688 +[gpub058:0/16] 2024-02-05 10:51:19,202 (trainer:740) INFO: 28epoch:train:14601-14700batch: iter_time=2.418e-04, forward_time=0.408, loss_ctc=50.315, loss_att=45.149, acc=0.752, loss=46.699, backward_time=0.314, grad_norm=41.102, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.891e-04, train_time=1.384 +[gpub058:0/16] 2024-02-05 10:54:06,461 (trainer:740) INFO: 28epoch:train:14701-14800batch: iter_time=8.563e-05, forward_time=0.290, loss_ctc=47.082, loss_att=50.079, acc=0.752, loss=49.180, backward_time=0.286, grad_norm=36.997, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.891e-04, train_time=1.672 +[gpub058:0/16] 2024-02-05 10:56:46,141 (trainer:740) INFO: 28epoch:train:14801-14900batch: iter_time=8.588e-05, forward_time=0.292, loss_ctc=51.789, loss_att=52.983, acc=0.731, loss=52.625, backward_time=0.286, grad_norm=40.012, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.890e-04, train_time=1.597 +[gpub058:0/16] 2024-02-05 11:00:07,614 (trainer:740) INFO: 28epoch:train:14901-15000batch: iter_time=3.673e-04, forward_time=0.365, loss_ctc=48.712, loss_att=49.442, acc=0.759, loss=49.223, backward_time=0.440, grad_norm=36.158, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.890e-04, train_time=2.013 +[gpub058:0/16] 2024-02-05 11:40:06,981 (trainer:346) INFO: 28epoch results: [train] iter_time=0.290, forward_time=0.334, loss_ctc=48.866, loss_att=48.308, acc=0.747, loss=48.476, backward_time=0.308, grad_norm=41.482, clip=100.000, loss_scale=5.519e+33, optim_step_time=0.096, optim0_lr0=1.907e-04, train_time=1.771, time=7 hours, 23 minutes and 14.91 seconds, total_count=450000, gpu_max_cached_mem_GB=41.105, [valid] loss_ctc=38.769, cer_ctc=0.193, loss_att=40.854, acc=0.668, cer=0.355, wer=1.000, loss=40.229, time=39 minutes and 34.66 seconds, total_count=140130, gpu_max_cached_mem_GB=41.105 +[gpub058:0/16] 2024-02-05 11:40:17,869 (trainer:394) INFO: The best model has been updated: valid.total_count +[gpub058:0/16] 2024-02-05 11:40:17,875 (trainer:275) INFO: 29/45epoch started. Estimated time to finish: 6 days, 8 hours and 49 minutes +[gpub058:0/16] 2024-02-05 11:40:17,886 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub058:0/16] 2024-02-05 11:40:35,703 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 11:40:39,131 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 11:40:39,132 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub058:0/16] 2024-02-05 11:40:39,135 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 11:48:27,862 (trainer:740) INFO: 29epoch:train:1-100batch: iter_time=3.704, forward_time=0.360, loss_ctc=45.736, loss_att=37.560, acc=0.779, loss=40.013, backward_time=0.294, grad_norm=40.355, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.890e-04, train_time=4.900 +[gpub058:0/16] 2024-02-05 11:50:29,479 (trainer:740) INFO: 29epoch:train:101-200batch: iter_time=8.141e-05, forward_time=0.290, loss_ctc=53.227, loss_att=50.924, acc=0.740, loss=51.615, backward_time=0.284, grad_norm=44.349, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.890e-04, train_time=1.216 +[gpub058:0/16] 2024-02-05 11:52:55,039 (trainer:740) INFO: 29epoch:train:201-300batch: iter_time=8.428e-05, forward_time=0.293, loss_ctc=51.624, loss_att=49.611, acc=0.751, loss=50.215, backward_time=0.299, grad_norm=43.885, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.889e-04, train_time=1.453 +[gpub058:0/16] 2024-02-05 11:55:36,002 (trainer:740) INFO: 29epoch:train:301-400batch: iter_time=9.736e-05, forward_time=0.426, loss_ctc=61.337, loss_att=52.807, acc=0.731, loss=55.366, backward_time=0.347, grad_norm=54.220, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.889e-04, train_time=1.612 +[gpub058:0/16] 2024-02-05 11:57:56,423 (trainer:740) INFO: 29epoch:train:401-500batch: iter_time=9.799e-05, forward_time=0.289, loss_ctc=45.291, loss_att=47.243, acc=0.734, loss=46.657, backward_time=0.285, grad_norm=40.422, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.889e-04, train_time=1.404 +[gpub058:0/16] 2024-02-05 11:59:58,209 (trainer:740) INFO: 29epoch:train:501-600batch: iter_time=9.651e-05, forward_time=0.320, loss_ctc=48.554, loss_att=44.044, acc=0.744, loss=45.397, backward_time=0.292, grad_norm=44.114, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.889e-04, train_time=1.217 +[gpub058:0/16] 2024-02-05 12:02:18,327 (trainer:740) INFO: 29epoch:train:601-700batch: iter_time=9.003e-05, forward_time=0.406, loss_ctc=54.479, loss_att=48.799, acc=0.750, loss=50.503, backward_time=0.369, grad_norm=39.726, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.889e-04, train_time=1.402 +[gpub058:0/16] 2024-02-05 12:04:46,638 (trainer:740) INFO: 29epoch:train:701-800batch: iter_time=9.552e-05, forward_time=0.290, loss_ctc=53.371, loss_att=51.759, acc=0.733, loss=52.243, backward_time=0.284, grad_norm=40.872, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.888e-04, train_time=1.483 +[gpub058:0/16] 2024-02-05 12:06:56,305 (trainer:740) INFO: 29epoch:train:801-900batch: iter_time=9.018e-05, forward_time=0.332, loss_ctc=61.209, loss_att=53.019, acc=0.747, loss=55.476, backward_time=0.310, grad_norm=48.408, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.888e-04, train_time=1.296 +[gpub058:0/16] 2024-02-05 12:09:44,872 (trainer:740) INFO: 29epoch:train:901-1000batch: iter_time=9.521e-05, forward_time=0.338, loss_ctc=46.789, loss_att=42.506, acc=0.749, loss=43.791, backward_time=0.333, grad_norm=35.252, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.888e-04, train_time=1.686 +[gpub058:0/16] 2024-02-05 12:11:59,651 (trainer:740) INFO: 29epoch:train:1001-1100batch: iter_time=9.519e-05, forward_time=0.290, loss_ctc=47.823, loss_att=46.240, acc=0.755, loss=46.715, backward_time=0.284, grad_norm=38.391, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.888e-04, train_time=1.348 +[gpub058:0/16] 2024-02-05 12:14:31,644 (trainer:740) INFO: 29epoch:train:1101-1200batch: iter_time=9.070e-05, forward_time=0.435, loss_ctc=51.511, loss_att=50.112, acc=0.741, loss=50.532, backward_time=0.346, grad_norm=55.198, clip=100.000, loss_scale=5.452e+33, optim_step_time=0.102, optim0_lr0=1.887e-04, train_time=1.519 +[gpub058:0/16] 2024-02-05 12:16:07,416 (multiple_iter_factory:32) INFO: Building 1th iter-factory... + +gpub059:2626467:2626539 [1] proxy.cc:1059 NCCL WARN [Proxy Service] Poll failed with error 1 +[gpub058:0/16] 2024-02-05 12:16:26,842 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub058:0/16] 2024-02-05 12:16:30,431 (abs_task:1616) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub058:0/16] 2024-02-05 12:16:30,431 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub058:0/16] 2024-02-05 12:16:30,505 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub058:0/16] 2024-02-05 12:23:23,544 (trainer:740) INFO: 29epoch:train:1201-1300batch: iter_time=3.955, forward_time=0.301, loss_ctc=42.756, loss_att=44.642, acc=0.751, loss=44.076, backward_time=0.285, grad_norm=36.088, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.887e-04, train_time=5.319 +[gpub058:0/16] 2024-02-05 12:25:57,116 (trainer:740) INFO: 29epoch:train:1301-1400batch: iter_time=8.834e-05, forward_time=0.429, loss_ctc=46.583, loss_att=43.040, acc=0.757, loss=44.103, backward_time=0.358, grad_norm=40.776, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=1.887e-04, train_time=1.535 +[gpub058:0/16] 2024-02-05 12:28:02,781 (trainer:740) INFO: 29epoch:train:1401-1500batch: iter_time=8.676e-05, forward_time=0.293, loss_ctc=57.213, loss_att=52.280, acc=0.748, loss=53.760, backward_time=0.286, grad_norm=47.402, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.887e-04, train_time=1.256 +[gpub058:0/16] 2024-02-05 12:30:16,651 (trainer:740) INFO: 29epoch:train:1501-1600batch: iter_time=9.515e-05, forward_time=0.303, loss_ctc=48.562, loss_att=44.553, acc=0.754, loss=45.756, backward_time=0.290, grad_norm=42.866, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.887e-04, train_time=1.339 +[gpub058:0/16] 2024-02-05 12:32:55,225 (trainer:740) INFO: 29epoch:train:1601-1700batch: iter_time=9.610e-05, forward_time=0.352, loss_ctc=57.465, loss_att=53.993, acc=0.724, loss=55.034, backward_time=0.302, grad_norm=49.336, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.886e-04, train_time=1.586 +[gpub058:0/16] 2024-02-05 12:34:10,026 (trainer:671) WARNING: The grad norm is nan. Skipping updating the model. +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2938841.0 ON gpub058 CANCELLED AT 2024-02-05T12:34:53 *** diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log new file mode 100644 index 0000000000000000000000000000000000000000..2ea517431109d1d3a24f257b92d73ebf06484d7a --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log @@ -0,0 +1,2927 @@ +# Running on gpub010.delta.ncsa.illinois.edu +# Started at Sat Feb 10 11:47:58 CST 2024 +# SLURMD_NODENAME=gpub010 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2968474 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x4)' +# SLURM_JOB_END_TIME=1707760063 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2968474 +# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[010,037,076,082]' +# SLURM_JOB_NUM_NODES=4 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1707587263 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=4 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[010,037,076,082]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x4)' +# SLURM_TASK_PID=2415375 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub010 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_01501f2e-ee88-4157-a837-1cdd946f45c6 +GpuFreq=control_disabled +GpuFreq=control_disabled +GpuFreq=control_disabled +GpuFreq=control_disabled +/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_sh/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_sh/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_sh/scratch/bbjs/peng6/espnet-owsm-train-20240205/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-train-20240205/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_01501f2e-ee88-4157-a837-1cdd946f45c6 +ape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --mulape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_01501f2e-ee88-4157-a837-1cdd946f45c6 +ape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_01501f2e-ee88-4157-a837-1cdd946f45c6 +tiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-train-20240205/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_01501f2e-ee88-4157-a837-1cdd946f45c6 +[gpub010:0/16] 2024-02-10 11:52:06,129 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub010:0/16] 2024-02-10 11:52:06,297 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 16 nodes. +[gpub010:0/16] 2024-02-10 11:52:06,382 (s2t:464) INFO: Vocabulary size: 50002 +[gpub010:0/16] 2024-02-10 11:52:15,378 (abs_task:1271) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub010:0/16] 2024-02-10 11:52:15,384 (abs_task:1272) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=4, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 768, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=14592, out_features=768, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=768, out_features=3072, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((1536,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=1536, out_features=768, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(1536, 1536, kernel_size=(31,), stride=(1,), padding=(15,), groups=1536) + (merge_proj): Linear(in_features=1536, out_features=768, bias=True) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 768) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=768, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=768, out_features=768, bias=True) + (linear_k): Linear(in_features=768, out_features=768, bias=True) + (linear_v): Linear(in_features=768, out_features=768, bias=True) + (linear_out): Linear(in_features=768, out_features=768, bias=True) + (dropout): Identity() + (q_norm): Identity() + (k_norm): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=768, out_features=3072, bias=True) + (w_2): Linear(in_features=3072, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=768, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 366.65 M + Number of trainable parameters: 366.65 M (100.0%) + Size: 1.47 GB + Type: torch.float32 +[gpub010:0/16] 2024-02-10 11:52:15,385 (abs_task:1275) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0005 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub010:0/16] 2024-02-10 11:52:15,385 (abs_task:1276) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0005]) +[gpub010:0/16] 2024-02-10 11:52:15,393 (abs_task:1285) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +[gpub010:0/16] 2024-02-10 11:52:20,873 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 11:52:21,776 (abs_task:1663) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 11:52:21,776 (abs_task:1664) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub010:0/16] 2024-02-10 11:52:21,777 (abs_task:1665) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 11:52:32,362 (trainer:168) INFO: The training was resumed using exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/checkpoint.pth +gpub010:2415458:2415458 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:2415458:2415458 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub010:2415458:2415458 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub010:2415458:2415458 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub010:0/16] 2024-02-10 11:52:37,461 (trainer:302) INFO: 40/45epoch started +[gpub010:0/16] 2024-02-10 11:52:37,515 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-02-10 11:52:55,705 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 11:52:59,362 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 11:52:59,363 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-02-10 11:52:59,366 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub037:2479731:2479731 [0] NCCL INFO cudaDriverVersion 12020 +gpub037:2479731:2479731 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:2479731:2479731 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub037:2479731:2479731 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub037:2479731:2479783 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub037:2479731:2479783 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub037:2479731:2479783 [0] NCCL INFO Using network AWS Libfabric +gpub037:2479731:2479783 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub037:2479731:2479783 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub037:2479731:2479783 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub037:2479731:2479783 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub037:2479731:2479783 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub037:2479731:2479783 [0] NCCL INFO Connected all rings +gpub037:2479731:2479783 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub037:2479731:2479783 [0] NCCL INFO Connected all trees +gpub037:2479731:2479783 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub037:2479731:2479783 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:2479731:2479783 [0] NCCL INFO comm 0x556289adbe40 rank 4 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:2415461:2415461 [3] NCCL INFO cudaDriverVersion 12020 +gpub010:2415461:2415461 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:2415461:2415461 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub010:2415461:2415461 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub010:2415461:2415512 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub010:2415461:2415512 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub010:2415461:2415512 [3] NCCL INFO Using network AWS Libfabric +gpub010:2415461:2415512 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub010:2415461:2415512 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub010:2415461:2415512 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub010:2415461:2415512 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub010:2415461:2415512 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub010:2415461:2415512 [3] NCCL INFO Connected all rings +gpub010:2415461:2415512 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub010:2415461:2415512 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub010:2415461:2415512 [3] NCCL INFO Connected all trees +gpub010:2415461:2415512 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:2415461:2415512 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:2415461:2415512 [3] NCCL INFO comm 0x56066bfecd40 rank 3 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub010:2415459:2415459 [1] NCCL INFO cudaDriverVersion 12020 +gpub010:2415459:2415459 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:2415459:2415459 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub010:2415459:2415459 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub010:2415459:2415511 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub010:2415459:2415511 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub010:2415459:2415511 [1] NCCL INFO Using network AWS Libfabric +gpub010:2415459:2415511 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub010:2415459:2415511 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub010:2415459:2415511 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub010:2415459:2415511 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub010:2415459:2415511 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub010:2415459:2415511 [1] NCCL INFO Connected all rings +gpub010:2415459:2415511 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub010:2415459:2415511 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub010:2415459:2415511 [1] NCCL INFO Connected all trees +gpub010:2415459:2415511 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:2415459:2415511 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:2415459:2415511 [1] NCCL INFO comm 0x5576d44db4b0 rank 1 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub037:2479733:2479733 [2] NCCL INFO cudaDriverVersion 12020 +gpub037:2479733:2479733 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:2479733:2479733 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub037:2479733:2479733 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub037:2479733:2479782 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub037:2479733:2479782 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub037:2479733:2479782 [2] NCCL INFO Using network AWS Libfabric +gpub037:2479733:2479782 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub037:2479733:2479782 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub037:2479733:2479782 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub037:2479733:2479782 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub037:2479733:2479782 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub037:2479733:2479782 [2] NCCL INFO Connected all rings +gpub037:2479733:2479782 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub037:2479733:2479782 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub037:2479733:2479782 [2] NCCL INFO Connected all trees +gpub037:2479733:2479782 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub037:2479733:2479782 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:2479733:2479782 [2] NCCL INFO comm 0x55c8aa09ba70 rank 6 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub037:2479732:2479732 [1] NCCL INFO cudaDriverVersion 12020 +gpub037:2479732:2479732 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:2479732:2479732 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub037:2479732:2479732 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub037:2479732:2479784 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub037:2479732:2479784 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub037:2479732:2479784 [1] NCCL INFO Using network AWS Libfabric +gpub037:2479732:2479784 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub037:2479732:2479784 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub037:2479732:2479784 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub037:2479732:2479784 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub037:2479732:2479784 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub010:2415460:2415460 [2] NCCL INFO cudaDriverVersion 12020 +gpub010:2415460:2415460 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:2415460:2415460 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub010:2415460:2415460 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub010:2415460:2415513 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub010:2415460:2415513 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub010:2415460:2415513 [2] NCCL INFO Using network AWS Libfabric +gpub010:2415460:2415513 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub010:2415460:2415513 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub010:2415460:2415513 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub010:2415460:2415513 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub010:2415460:2415513 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub037:2479732:2479784 [1] NCCL INFO Connected all rings +gpub037:2479732:2479784 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub037:2479732:2479784 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/AWS Libfabric/1 +gpub037:2479732:2479784 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub037:2479732:2479784 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub037:2479732:2479784 [1] NCCL INFO Connected all trees +gpub037:2479732:2479784 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub037:2479732:2479784 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:2479732:2479784 [1] NCCL INFO comm 0x5641d0bbff50 rank 5 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub010:2415460:2415513 [2] NCCL INFO Connected all rings +gpub010:2415460:2415513 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub010:2415460:2415513 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub010:2415460:2415513 [2] NCCL INFO Connected all trees +gpub010:2415460:2415513 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:2415460:2415513 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:2415460:2415513 [2] NCCL INFO comm 0x560007013060 rank 2 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub082:3674493:3674493 [0] NCCL INFO cudaDriverVersion 12020 +gpub082:3674493:3674493 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:3674493:3674493 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub082:3674493:3674493 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub082:3674493:3674547 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub082:3674493:3674547 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub082:3674493:3674547 [0] NCCL INFO Using network AWS Libfabric +gpub082:3674493:3674547 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub082:3674493:3674547 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub082:3674493:3674547 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->-1 +gpub082:3674493:3674547 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub010:2415458:2415510 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub010:2415458:2415510 [0] NCCL INFO Using network AWS Libfabric +gpub010:2415458:2415510 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub010:2415458:2415510 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub010:2415458:2415510 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub010:2415458:2415510 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +gpub010:2415458:2415510 [0] NCCL INFO Trees [0] 1/8/-1->0->-1 [1] 1/-1/-1->0->4 +gpub010:2415458:2415510 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub082:3674493:3674547 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub082:3674493:3674547 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub082:3674493:3674547 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub082:3674493:3674547 [0] NCCL INFO Connected all rings +gpub082:3674493:3674547 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub082:3674493:3674547 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 +gpub082:3674493:3674547 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub082:3674493:3674547 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub082:3674493:3674547 [0] NCCL INFO Connected all trees +gpub082:3674493:3674547 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub082:3674493:3674547 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:2415458:2415510 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub010:2415458:2415510 [0] NCCL INFO Connected all rings +gpub010:2415458:2415510 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 +gpub010:2415458:2415510 [0] NCCL INFO Connected all trees +gpub010:2415458:2415510 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub010:2415458:2415510 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:2415458:2415510 [0] NCCL INFO comm 0x5627bbb736f0 rank 0 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub082:3674493:3674547 [0] NCCL INFO comm 0x5595c37939f0 rank 12 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +gpub082:3674496:3674496 [3] NCCL INFO cudaDriverVersion 12020 +gpub082:3674496:3674496 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:3674496:3674496 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub082:3674496:3674496 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub082:3674496:3674548 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub082:3674496:3674548 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub082:3674496:3674548 [3] NCCL INFO Using network AWS Libfabric +gpub082:3674496:3674548 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub082:3674496:3674548 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub082:3674496:3674548 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub082:3674496:3674548 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub082:3674496:3674548 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub082:3674496:3674548 [3] NCCL INFO Connected all rings +gpub082:3674496:3674548 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub082:3674496:3674548 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub082:3674496:3674548 [3] NCCL INFO Connected all trees +gpub082:3674496:3674548 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub082:3674496:3674548 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:3674496:3674548 [3] NCCL INFO comm 0x55cb4fbfa630 rank 15 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub076:3268963:3268963 [3] NCCL INFO cudaDriverVersion 12020 +gpub076:3268963:3268963 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3268963:3268963 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3268963:3268963 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3268963:3269014 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3268963:3269014 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3268963:3269014 [3] NCCL INFO Using network AWS Libfabric +gpub076:3268963:3269014 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub076:3268963:3269014 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3268963:3269014 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub076:3268963:3269014 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub076:3268963:3269014 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub076:3268963:3269014 [3] NCCL INFO Connected all rings +gpub076:3268963:3269014 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub076:3268963:3269014 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub076:3268963:3269014 [3] NCCL INFO Connected all trees +gpub076:3268963:3269014 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3268963:3269014 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3268963:3269014 [3] NCCL INFO comm 0x56218608af50 rank 11 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub082:3674494:3674494 [1] NCCL INFO cudaDriverVersion 12020 +gpub082:3674494:3674494 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:3674494:3674494 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub082:3674494:3674494 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub082:3674494:3674545 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub082:3674494:3674545 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub082:3674494:3674545 [1] NCCL INFO Using network AWS Libfabric +gpub082:3674494:3674545 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub082:3674494:3674545 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub082:3674494:3674545 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/-1/-1->13->12 +gpub082:3674494:3674545 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub082:3674494:3674545 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub082:3674494:3674545 [1] NCCL INFO Connected all rings +gpub082:3674494:3674545 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub082:3674494:3674545 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub082:3674494:3674545 [1] NCCL INFO Connected all trees +gpub082:3674494:3674545 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub082:3674494:3674545 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:3674494:3674545 [1] NCCL INFO comm 0x5605a17f6130 rank 13 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub082:3674495:3674495 [2] NCCL INFO cudaDriverVersion 12020 +gpub082:3674495:3674495 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:3674495:3674495 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub082:3674495:3674495 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub082:3674495:3674546 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub082:3674495:3674546 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub082:3674495:3674546 [2] NCCL INFO Using network AWS Libfabric +gpub082:3674495:3674546 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub082:3674495:3674546 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub082:3674495:3674546 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub082:3674495:3674546 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub082:3674495:3674546 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub082:3674495:3674546 [2] NCCL INFO Connected all rings +gpub082:3674495:3674546 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub082:3674495:3674546 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub082:3674495:3674546 [2] NCCL INFO Connected all trees +gpub082:3674495:3674546 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub082:3674495:3674546 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:3674495:3674546 [2] NCCL INFO comm 0x556db2db58a0 rank 14 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub037:2479734:2479734 [3] NCCL INFO cudaDriverVersion 12020 +gpub037:2479734:2479734 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:2479734:2479734 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub037:2479734:2479734 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub037:2479734:2479781 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub037:2479734:2479781 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub037:2479734:2479781 [3] NCCL INFO Using network AWS Libfabric +gpub037:2479734:2479781 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub037:2479734:2479781 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub037:2479734:2479781 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub037:2479734:2479781 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub037:2479734:2479781 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 +gpub037:2479734:2479781 [3] NCCL INFO Connected all rings +gpub037:2479734:2479781 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub037:2479734:2479781 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub037:2479734:2479781 [3] NCCL INFO Connected all trees +gpub037:2479734:2479781 [3] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub037:2479734:2479781 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:2479734:2479781 [3] NCCL INFO comm 0x560fe900a3e0 rank 7 nranks 16 cudaDev 3 busId c7000 - Init COMPLETE +gpub076:3268961:3268961 [1] NCCL INFO cudaDriverVersion 12020 +gpub076:3268961:3268961 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3268961:3268961 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3268961:3268961 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3268961:3269015 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3268961:3269015 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3268961:3269015 [1] NCCL INFO Using network AWS Libfabric +gpub076:3268961:3269015 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub076:3268961:3269015 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3268961:3269015 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub076:3268961:3269015 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub076:3268961:3269015 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub076:3268961:3269015 [1] NCCL INFO Connected all rings +gpub076:3268961:3269015 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/AWS Libfabric/1 +gpub076:3268961:3269015 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/AWS Libfabric/1 +gpub076:3268961:3269015 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub076:3268961:3269015 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub076:3268961:3269015 [1] NCCL INFO Connected all trees +gpub076:3268961:3269015 [1] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3268961:3269015 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3268961:3269015 [1] NCCL INFO comm 0x558968564390 rank 9 nranks 16 cudaDev 1 busId 46000 - Init COMPLETE +gpub076:3268962:3268962 [2] NCCL INFO cudaDriverVersion 12020 +gpub076:3268962:3268962 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3268962:3268962 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3268962:3268962 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3268962:3269013 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3268962:3269013 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3268962:3269013 [2] NCCL INFO Using network AWS Libfabric +gpub076:3268962:3269013 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub076:3268962:3269013 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3268962:3269013 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub076:3268962:3269013 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub076:3268962:3269013 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub076:3268962:3269013 [2] NCCL INFO Connected all rings +gpub076:3268962:3269013 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub076:3268962:3269013 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub076:3268962:3269013 [2] NCCL INFO Connected all trees +gpub076:3268962:3269013 [2] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3268962:3269013 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3268962:3269013 [2] NCCL INFO comm 0x561f7f19bb70 rank 10 nranks 16 cudaDev 2 busId 85000 - Init COMPLETE +gpub076:3268960:3268960 [0] NCCL INFO cudaDriverVersion 12020 +gpub076:3268960:3268960 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3268960:3268960 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. +gpub076:3268960:3268960 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). +gpub076:3268960:3269012 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 +gpub076:3268960:3269012 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) +gpub076:3268960:3269012 [0] NCCL INFO Using network AWS Libfabric +gpub076:3268960:3269012 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub076:3268960:3269012 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. +gpub076:3268960:3269012 [0] NCCL INFO Trees [0] 9/12/-1->8->0 [1] 9/-1/-1->8->5 +gpub076:3268960:3269012 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub076:3268960:3269012 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub076:3268960:3269012 [0] NCCL INFO Connected all rings +gpub076:3268960:3269012 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 00/0 : 0[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 00/0 : 8[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/AWS Libfabric/1 +gpub076:3268960:3269012 [0] NCCL INFO Connected all trees +gpub076:3268960:3269012 [0] NCCL INFO threadThresholds 8/8/64 | 128/8/64 | 512 | 512 +gpub076:3268960:3269012 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3268960:3269012 [0] NCCL INFO comm 0x55f0b7620d20 rank 8 nranks 16 cudaDev 0 busId 7000 - Init COMPLETE +[gpub010:0/16] 2024-02-10 11:58:55,244 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub010:0/16] 2024-02-10 12:00:43,387 (trainer:762) INFO: 40epoch:train:1-100batch: iter_time=1.260, forward_time=0.398, loss_ctc=47.901, loss_att=39.871, acc=0.778, loss=42.280, backward_time=0.310, grad_norm=43.063, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.601e-04, train_time=4.855 +[gpub010:0/16] 2024-02-10 12:02:43,424 (trainer:762) INFO: 40epoch:train:101-200batch: iter_time=8.535e-05, forward_time=0.312, loss_ctc=44.696, loss_att=49.824, acc=0.743, loss=48.286, backward_time=0.318, grad_norm=52.937, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.601e-04, train_time=1.204 +[gpub010:0/16] 2024-02-10 12:04:53,197 (trainer:762) INFO: 40epoch:train:201-300batch: iter_time=8.491e-05, forward_time=0.330, loss_ctc=39.988, loss_att=35.494, acc=0.781, loss=36.842, backward_time=0.314, grad_norm=38.837, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.601e-04, train_time=1.298 +[gpub010:0/16] 2024-02-10 12:07:03,306 (trainer:762) INFO: 40epoch:train:301-400batch: iter_time=8.791e-05, forward_time=0.334, loss_ctc=57.195, loss_att=55.241, acc=0.736, loss=55.828, backward_time=0.309, grad_norm=56.273, clip=100.000, loss_scale=6.023e+33, optim_step_time=0.095, optim0_lr0=1.601e-04, train_time=1.301 +[gpub010:0/16] 2024-02-10 12:09:16,703 (trainer:762) INFO: 40epoch:train:401-500batch: iter_time=9.050e-05, forward_time=0.293, loss_ctc=51.568, loss_att=50.811, acc=0.741, loss=51.038, backward_time=0.297, grad_norm=82.978, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.601e-04, train_time=1.334 +[gpub010:0/16] 2024-02-10 12:11:21,822 (trainer:762) INFO: 40epoch:train:501-600batch: iter_time=9.397e-05, forward_time=0.352, loss_ctc=46.301, loss_att=44.629, acc=0.786, loss=45.131, backward_time=0.319, grad_norm=38.650, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.601e-04, train_time=1.251 +[gpub010:0/16] 2024-02-10 12:13:26,807 (trainer:762) INFO: 40epoch:train:601-700batch: iter_time=2.602e-04, forward_time=0.346, loss_ctc=50.860, loss_att=49.494, acc=0.761, loss=49.904, backward_time=0.318, grad_norm=49.514, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.601e-04, train_time=1.250 +[gpub010:0/16] 2024-02-10 12:15:24,808 (trainer:762) INFO: 40epoch:train:701-800batch: iter_time=9.262e-05, forward_time=0.321, loss_ctc=45.788, loss_att=45.309, acc=0.763, loss=45.453, backward_time=0.305, grad_norm=43.529, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.600e-04, train_time=1.180 +[gpub010:0/16] 2024-02-10 12:17:56,775 (trainer:762) INFO: 40epoch:train:801-900batch: iter_time=9.242e-05, forward_time=0.318, loss_ctc=45.886, loss_att=41.052, acc=0.769, loss=42.502, backward_time=0.309, grad_norm=40.589, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.600e-04, train_time=1.520 +[gpub010:0/16] 2024-02-10 12:20:02,489 (trainer:762) INFO: 40epoch:train:901-1000batch: iter_time=2.359e-04, forward_time=0.330, loss_ctc=54.152, loss_att=54.472, acc=0.743, loss=54.376, backward_time=0.314, grad_norm=56.519, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.109, optim0_lr0=1.600e-04, train_time=1.257 +[gpub010:0/16] 2024-02-10 12:22:25,406 (trainer:762) INFO: 40epoch:train:1001-1100batch: iter_time=8.610e-05, forward_time=0.296, loss_ctc=51.097, loss_att=48.635, acc=0.758, loss=49.374, backward_time=0.300, grad_norm=49.692, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.600e-04, train_time=1.429 +[gpub010:0/16] 2024-02-10 12:24:37,323 (trainer:762) INFO: 40epoch:train:1101-1200batch: iter_time=8.466e-05, forward_time=0.347, loss_ctc=49.612, loss_att=47.865, acc=0.759, loss=48.389, backward_time=0.311, grad_norm=43.913, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.600e-04, train_time=1.318 +[gpub010:0/16] 2024-02-10 12:26:02,634 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-02-10 12:26:21,405 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 12:26:24,841 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 12:26:24,841 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-02-10 12:26:24,889 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 12:32:54,774 (trainer:762) INFO: 40epoch:train:1201-1300batch: iter_time=3.673, forward_time=0.365, loss_ctc=43.073, loss_att=40.587, acc=0.772, loss=41.333, backward_time=0.307, grad_norm=40.794, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.600e-04, train_time=4.974 +[gpub010:0/16] 2024-02-10 12:34:55,715 (trainer:762) INFO: 40epoch:train:1301-1400batch: iter_time=2.625e-04, forward_time=0.309, loss_ctc=45.773, loss_att=47.790, acc=0.754, loss=47.185, backward_time=0.303, grad_norm=45.002, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.600e-04, train_time=1.210 +[gpub010:0/16] 2024-02-10 12:37:25,511 (trainer:762) INFO: 40epoch:train:1401-1500batch: iter_time=8.048e-05, forward_time=0.340, loss_ctc=41.423, loss_att=40.243, acc=0.755, loss=40.597, backward_time=0.348, grad_norm=48.436, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.599e-04, train_time=1.498 +[gpub010:0/16] 2024-02-10 12:39:20,998 (trainer:762) INFO: 40epoch:train:1501-1600batch: iter_time=8.306e-05, forward_time=0.288, loss_ctc=42.795, loss_att=40.155, acc=0.761, loss=40.947, backward_time=0.295, grad_norm=41.239, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.599e-04, train_time=1.155 +[gpub010:0/16] 2024-02-10 12:41:41,703 (trainer:762) INFO: 40epoch:train:1601-1700batch: iter_time=2.731e-04, forward_time=0.379, loss_ctc=48.970, loss_att=51.471, acc=0.746, loss=50.721, backward_time=0.321, grad_norm=51.183, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.599e-04, train_time=1.406 +[gpub010:0/16] 2024-02-10 12:43:47,392 (trainer:762) INFO: 40epoch:train:1701-1800batch: iter_time=8.992e-05, forward_time=0.290, loss_ctc=53.638, loss_att=47.475, acc=0.748, loss=49.324, backward_time=0.297, grad_norm=48.194, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.599e-04, train_time=1.258 +[gpub010:0/16] 2024-02-10 12:46:14,131 (trainer:762) INFO: 40epoch:train:1801-1900batch: iter_time=8.856e-05, forward_time=0.361, loss_ctc=49.602, loss_att=48.341, acc=0.766, loss=48.719, backward_time=0.349, grad_norm=43.906, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.599e-04, train_time=1.468 +[gpub010:0/16] 2024-02-10 12:47:15,171 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 12:48:12,268 (trainer:762) INFO: 40epoch:train:1901-2000batch: iter_time=8.882e-05, forward_time=0.290, loss_ctc=43.860, loss_att=42.668, acc=0.768, loss=43.025, backward_time=0.296, grad_norm=37.385, clip=100.000, loss_scale=7.762e+33, optim_step_time=0.094, optim0_lr0=1.599e-04, train_time=1.181 +[gpub010:0/16] 2024-02-10 12:50:18,148 (trainer:762) INFO: 40epoch:train:2001-2100batch: iter_time=8.596e-05, forward_time=0.290, loss_ctc=44.531, loss_att=39.965, acc=0.770, loss=41.334, backward_time=0.296, grad_norm=42.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.599e-04, train_time=1.259 +[gpub010:0/16] 2024-02-10 12:52:32,393 (trainer:762) INFO: 40epoch:train:2101-2200batch: iter_time=7.192e-04, forward_time=0.391, loss_ctc=47.893, loss_att=45.931, acc=0.758, loss=46.519, backward_time=0.337, grad_norm=44.277, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.599e-04, train_time=1.343 +[gpub010:0/16] 2024-02-10 12:54:43,724 (trainer:762) INFO: 40epoch:train:2201-2300batch: iter_time=8.314e-05, forward_time=0.290, loss_ctc=51.415, loss_att=49.611, acc=0.739, loss=50.152, backward_time=0.294, grad_norm=56.291, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.598e-04, train_time=1.312 +[gpub010:0/16] 2024-02-10 12:56:55,187 (trainer:762) INFO: 40epoch:train:2301-2400batch: iter_time=1.920e-04, forward_time=0.401, loss_ctc=51.454, loss_att=51.156, acc=0.753, loss=51.246, backward_time=0.337, grad_norm=44.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.598e-04, train_time=1.315 +[gpub010:0/16] 2024-02-10 12:59:02,167 (trainer:762) INFO: 40epoch:train:2401-2500batch: iter_time=8.054e-05, forward_time=0.291, loss_ctc=43.977, loss_att=44.243, acc=0.766, loss=44.163, backward_time=0.296, grad_norm=41.113, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.270 +[gpub010:0/16] 2024-02-10 12:59:22,233 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-02-10 12:59:41,691 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 12:59:45,101 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 12:59:45,101 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-02-10 12:59:45,143 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 13:06:42,136 (trainer:762) INFO: 40epoch:train:2501-2600batch: iter_time=3.083, forward_time=0.289, loss_ctc=45.848, loss_att=37.571, acc=0.778, loss=40.054, backward_time=0.294, grad_norm=41.291, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.598e-04, train_time=4.599 +[gpub010:0/16] 2024-02-10 13:09:14,573 (trainer:762) INFO: 40epoch:train:2601-2700batch: iter_time=7.959e-05, forward_time=0.406, loss_ctc=42.702, loss_att=45.976, acc=0.742, loss=44.994, backward_time=0.322, grad_norm=47.259, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.598e-04, train_time=1.524 +[gpub010:0/16] 2024-02-10 13:11:46,961 (trainer:762) INFO: 40epoch:train:2701-2800batch: iter_time=8.438e-05, forward_time=0.288, loss_ctc=39.356, loss_att=34.477, acc=0.782, loss=35.941, backward_time=0.290, grad_norm=37.401, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.524 +[gpub010:0/16] 2024-02-10 13:14:06,104 (trainer:762) INFO: 40epoch:train:2801-2900batch: iter_time=8.348e-05, forward_time=0.290, loss_ctc=53.434, loss_att=54.241, acc=0.733, loss=53.999, backward_time=0.295, grad_norm=52.235, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.598e-04, train_time=1.391 +[gpub010:0/16] 2024-02-10 13:16:37,986 (trainer:762) INFO: 40epoch:train:2901-3000batch: iter_time=2.428e-04, forward_time=0.403, loss_ctc=49.202, loss_att=47.599, acc=0.744, loss=48.080, backward_time=0.334, grad_norm=49.100, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.597e-04, train_time=1.518 +[gpub010:0/16] 2024-02-10 13:18:51,861 (trainer:762) INFO: 40epoch:train:3001-3100batch: iter_time=8.354e-05, forward_time=0.314, loss_ctc=45.527, loss_att=44.114, acc=0.781, loss=44.538, backward_time=0.297, grad_norm=39.483, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.597e-04, train_time=1.340 +[gpub010:0/16] 2024-02-10 13:21:24,722 (trainer:762) INFO: 40epoch:train:3101-3200batch: iter_time=7.915e-05, forward_time=0.386, loss_ctc=49.102, loss_att=47.347, acc=0.760, loss=47.873, backward_time=0.340, grad_norm=44.324, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.597e-04, train_time=1.528 +[gpub010:0/16] 2024-02-10 13:23:50,889 (trainer:762) INFO: 40epoch:train:3201-3300batch: iter_time=8.745e-05, forward_time=0.291, loss_ctc=44.650, loss_att=44.041, acc=0.764, loss=44.224, backward_time=0.295, grad_norm=41.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.597e-04, train_time=1.461 +[gpub010:0/16] 2024-02-10 13:26:01,543 (trainer:762) INFO: 40epoch:train:3301-3400batch: iter_time=8.466e-05, forward_time=0.290, loss_ctc=43.888, loss_att=39.398, acc=0.766, loss=40.745, backward_time=0.294, grad_norm=39.708, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.597e-04, train_time=1.307 +[gpub010:0/16] 2024-02-10 13:28:13,243 (trainer:762) INFO: 40epoch:train:3401-3500batch: iter_time=1.630e-04, forward_time=0.317, loss_ctc=53.023, loss_att=52.142, acc=0.746, loss=52.406, backward_time=0.323, grad_norm=58.348, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.597e-04, train_time=1.317 +[gpub010:0/16] 2024-02-10 13:30:35,373 (trainer:762) INFO: 40epoch:train:3501-3600batch: iter_time=8.684e-05, forward_time=0.334, loss_ctc=50.079, loss_att=47.030, acc=0.751, loss=47.945, backward_time=0.306, grad_norm=47.320, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.597e-04, train_time=1.421 +[gpub010:0/16] 2024-02-10 13:32:46,610 (trainer:762) INFO: 40epoch:train:3601-3700batch: iter_time=8.294e-05, forward_time=0.293, loss_ctc=48.389, loss_att=46.672, acc=0.764, loss=47.187, backward_time=0.298, grad_norm=43.480, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.597e-04, train_time=1.311 +[gpub010:0/16] 2024-02-10 13:34:19,725 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-02-10 13:34:38,520 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 13:34:41,885 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 13:34:41,885 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-02-10 13:34:41,942 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 13:44:06,410 (trainer:762) INFO: 40epoch:train:3701-3800batch: iter_time=3.316, forward_time=0.345, loss_ctc=42.132, loss_att=39.308, acc=0.773, loss=40.155, backward_time=0.335, grad_norm=42.612, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.596e-04, train_time=6.799 +[gpub010:0/16] 2024-02-10 13:46:35,787 (trainer:762) INFO: 40epoch:train:3801-3900batch: iter_time=8.472e-05, forward_time=0.292, loss_ctc=46.106, loss_att=46.493, acc=0.759, loss=46.377, backward_time=0.295, grad_norm=44.336, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=1.494 +[gpub010:0/16] 2024-02-10 13:48:36,105 (trainer:762) INFO: 40epoch:train:3901-4000batch: iter_time=8.231e-05, forward_time=0.289, loss_ctc=41.261, loss_att=39.442, acc=0.757, loss=39.988, backward_time=0.294, grad_norm=46.186, clip=100.000, loss_scale=7.788e+33, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=1.203 +[gpub010:0/16] 2024-02-10 13:50:29,817 (trainer:762) INFO: 40epoch:train:4001-4100batch: iter_time=8.879e-05, forward_time=0.290, loss_ctc=42.557, loss_att=39.819, acc=0.762, loss=40.640, backward_time=0.295, grad_norm=40.873, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=1.136 +[gpub010:0/16] 2024-02-10 13:52:51,995 (trainer:762) INFO: 40epoch:train:4101-4200batch: iter_time=1.596e-04, forward_time=0.377, loss_ctc=48.177, loss_att=50.963, acc=0.749, loss=50.127, backward_time=0.321, grad_norm=50.511, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.103, optim0_lr0=1.596e-04, train_time=1.422 +[gpub010:0/16] 2024-02-10 13:55:07,393 (trainer:762) INFO: 40epoch:train:4201-4300batch: iter_time=8.163e-05, forward_time=0.291, loss_ctc=52.233, loss_att=46.562, acc=0.752, loss=48.264, backward_time=0.296, grad_norm=46.136, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=1.354 +[gpub010:0/16] 2024-02-10 13:57:25,906 (trainer:762) INFO: 40epoch:train:4301-4400batch: iter_time=8.091e-05, forward_time=0.292, loss_ctc=48.809, loss_att=47.418, acc=0.770, loss=47.835, backward_time=0.295, grad_norm=45.091, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.596e-04, train_time=1.385 +[gpub010:0/16] 2024-02-10 13:59:37,022 (trainer:762) INFO: 40epoch:train:4401-4500batch: iter_time=8.286e-05, forward_time=0.291, loss_ctc=43.374, loss_att=42.115, acc=0.771, loss=42.493, backward_time=0.296, grad_norm=38.498, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=1.311 +[gpub010:0/16] 2024-02-10 14:01:55,072 (trainer:762) INFO: 40epoch:train:4501-4600batch: iter_time=8.672e-05, forward_time=0.394, loss_ctc=43.859, loss_att=39.505, acc=0.772, loss=40.811, backward_time=0.323, grad_norm=39.214, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.595e-04, train_time=1.379 +[gpub010:0/16] 2024-02-10 14:04:06,467 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 14:04:09,695 (trainer:762) INFO: 40epoch:train:4601-4700batch: iter_time=8.248e-05, forward_time=0.292, loss_ctc=47.303, loss_att=45.724, acc=0.760, loss=46.197, backward_time=0.296, grad_norm=46.321, clip=100.000, loss_scale=1.023e+34, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=1.347 +[gpub010:0/16] 2024-02-10 14:06:12,638 (trainer:762) INFO: 40epoch:train:4701-4800batch: iter_time=7.951e-05, forward_time=0.290, loss_ctc=51.608, loss_att=48.284, acc=0.741, loss=49.282, backward_time=0.295, grad_norm=55.462, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=1.229 +[gpub010:0/16] 2024-02-10 14:08:28,734 (trainer:762) INFO: 40epoch:train:4801-4900batch: iter_time=8.004e-05, forward_time=0.293, loss_ctc=51.254, loss_att=50.069, acc=0.756, loss=50.425, backward_time=0.299, grad_norm=46.201, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=1.361 +[gpub010:0/16] 2024-02-10 14:10:47,668 (trainer:762) INFO: 40epoch:train:4901-5000batch: iter_time=3.072e-04, forward_time=0.386, loss_ctc=43.229, loss_att=44.113, acc=0.768, loss=43.848, backward_time=0.318, grad_norm=40.200, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.595e-04, train_time=1.389 +[gpub010:0/16] 2024-02-10 14:11:07,735 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-02-10 14:11:26,345 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 14:11:29,757 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 14:11:29,757 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-02-10 14:11:29,785 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 14:18:29,852 (trainer:762) INFO: 40epoch:train:5001-5100batch: iter_time=3.351, forward_time=0.290, loss_ctc=44.477, loss_att=38.680, acc=0.785, loss=40.419, backward_time=0.296, grad_norm=41.907, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.595e-04, train_time=4.621 +[gpub010:0/16] 2024-02-10 14:20:41,022 (trainer:762) INFO: 40epoch:train:5101-5200batch: iter_time=8.095e-05, forward_time=0.291, loss_ctc=42.272, loss_att=46.753, acc=0.755, loss=45.409, backward_time=0.295, grad_norm=48.111, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.312 +[gpub010:0/16] 2024-02-10 14:23:03,568 (trainer:762) INFO: 40epoch:train:5201-5300batch: iter_time=2.481e-04, forward_time=0.350, loss_ctc=39.160, loss_att=34.808, acc=0.789, loss=36.114, backward_time=0.316, grad_norm=38.199, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.594e-04, train_time=1.425 +[gpub010:0/16] 2024-02-10 14:25:09,635 (trainer:762) INFO: 40epoch:train:5301-5400batch: iter_time=8.953e-05, forward_time=0.292, loss_ctc=52.434, loss_att=53.356, acc=0.745, loss=53.079, backward_time=0.297, grad_norm=54.717, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.260 +[gpub010:0/16] 2024-02-10 14:27:16,433 (trainer:762) INFO: 40epoch:train:5401-5500batch: iter_time=8.393e-05, forward_time=0.292, loss_ctc=47.941, loss_att=48.895, acc=0.749, loss=48.609, backward_time=0.295, grad_norm=46.488, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.268 +[gpub010:0/16] 2024-02-10 14:29:30,518 (trainer:762) INFO: 40epoch:train:5501-5600batch: iter_time=8.202e-05, forward_time=0.363, loss_ctc=44.999, loss_att=43.800, acc=0.791, loss=44.160, backward_time=0.323, grad_norm=38.399, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.594e-04, train_time=1.341 +[gpub010:0/16] 2024-02-10 14:31:55,157 (trainer:762) INFO: 40epoch:train:5601-5700batch: iter_time=8.560e-05, forward_time=0.292, loss_ctc=48.878, loss_att=48.577, acc=0.767, loss=48.667, backward_time=0.295, grad_norm=47.269, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.594e-04, train_time=1.446 +[gpub010:0/16] 2024-02-10 14:33:55,703 (trainer:762) INFO: 40epoch:train:5701-5800batch: iter_time=8.530e-05, forward_time=0.292, loss_ctc=44.221, loss_att=44.004, acc=0.769, loss=44.069, backward_time=0.297, grad_norm=39.815, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.594e-04, train_time=1.205 +[gpub010:0/16] 2024-02-10 14:36:18,145 (trainer:762) INFO: 40epoch:train:5801-5900batch: iter_time=2.072e-04, forward_time=0.377, loss_ctc=43.578, loss_att=40.294, acc=0.775, loss=41.279, backward_time=0.343, grad_norm=39.532, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.594e-04, train_time=1.408 +[gpub010:0/16] 2024-02-10 14:40:23,959 (trainer:762) INFO: 40epoch:train:5901-6000batch: iter_time=0.145, forward_time=0.318, loss_ctc=52.603, loss_att=52.393, acc=0.748, loss=52.456, backward_time=0.305, grad_norm=60.238, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.593e-04, train_time=2.463 +[gpub010:0/16] 2024-02-10 14:42:54,148 (trainer:762) INFO: 40epoch:train:6001-6100batch: iter_time=0.063, forward_time=0.292, loss_ctc=50.086, loss_att=48.203, acc=0.762, loss=48.768, backward_time=0.296, grad_norm=45.856, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.593e-04, train_time=1.513 +[gpub010:0/16] 2024-02-10 14:45:11,280 (trainer:762) INFO: 40epoch:train:6101-6200batch: iter_time=7.809e-05, forward_time=0.319, loss_ctc=47.861, loss_att=46.481, acc=0.766, loss=46.895, backward_time=0.299, grad_norm=40.735, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.593e-04, train_time=1.371 +[gpub010:0/16] 2024-02-10 14:46:37,576 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-02-10 14:46:56,122 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 14:46:59,951 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 14:46:59,951 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-02-10 14:47:00,021 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 14:53:00,994 (trainer:762) INFO: 40epoch:train:6201-6300batch: iter_time=3.342, forward_time=0.360, loss_ctc=41.527, loss_att=39.439, acc=0.781, loss=40.065, backward_time=0.317, grad_norm=40.996, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.593e-04, train_time=4.697 +[gpub010:0/16] 2024-02-10 14:55:03,522 (trainer:762) INFO: 40epoch:train:6301-6400batch: iter_time=8.387e-05, forward_time=0.292, loss_ctc=45.072, loss_att=45.905, acc=0.777, loss=45.655, backward_time=0.299, grad_norm=42.357, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.593e-04, train_time=1.225 +[gpub010:0/16] 2024-02-10 14:57:05,966 (trainer:762) INFO: 40epoch:train:6401-6500batch: iter_time=8.277e-05, forward_time=0.290, loss_ctc=40.915, loss_att=40.981, acc=0.763, loss=40.961, backward_time=0.294, grad_norm=46.945, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.593e-04, train_time=1.224 +[gpub010:0/16] 2024-02-10 14:59:18,285 (trainer:762) INFO: 40epoch:train:6501-6600batch: iter_time=8.679e-05, forward_time=0.381, loss_ctc=42.181, loss_att=39.467, acc=0.770, loss=40.281, backward_time=0.322, grad_norm=41.552, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.593e-04, train_time=1.323 +[gpub010:0/16] 2024-02-10 15:01:21,439 (trainer:762) INFO: 40epoch:train:6601-6700batch: iter_time=9.148e-05, forward_time=0.290, loss_ctc=47.045, loss_att=52.893, acc=0.754, loss=51.139, backward_time=0.296, grad_norm=48.877, clip=100.000, loss_scale=5.348e+33, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.231 +[gpub010:0/16] 2024-02-10 15:03:20,121 (trainer:762) INFO: 40epoch:train:6701-6800batch: iter_time=8.964e-05, forward_time=0.292, loss_ctc=51.554, loss_att=46.123, acc=0.761, loss=47.752, backward_time=0.297, grad_norm=47.487, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.187 +[gpub010:0/16] 2024-02-10 15:05:16,244 (trainer:762) INFO: 40epoch:train:6801-6900batch: iter_time=8.359e-05, forward_time=0.334, loss_ctc=48.164, loss_att=46.949, acc=0.778, loss=47.313, backward_time=0.322, grad_norm=47.786, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.592e-04, train_time=1.161 +[gpub010:0/16] 2024-02-10 15:07:33,325 (trainer:762) INFO: 40epoch:train:6901-7000batch: iter_time=8.453e-05, forward_time=0.320, loss_ctc=43.582, loss_att=43.974, acc=0.775, loss=43.857, backward_time=0.301, grad_norm=38.080, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.370 +[gpub010:0/16] 2024-02-10 15:09:34,843 (trainer:762) INFO: 40epoch:train:7001-7100batch: iter_time=8.269e-05, forward_time=0.314, loss_ctc=43.814, loss_att=40.363, acc=0.777, loss=41.398, backward_time=0.297, grad_norm=40.037, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.215 +[gpub010:0/16] 2024-02-10 15:11:29,647 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 15:11:30,846 (trainer:762) INFO: 40epoch:train:7101-7200batch: iter_time=3.964e-04, forward_time=0.327, loss_ctc=46.801, loss_att=46.053, acc=0.766, loss=46.278, backward_time=0.328, grad_norm=41.385, clip=100.000, loss_scale=1.033e+34, optim_step_time=0.098, optim0_lr0=1.592e-04, train_time=1.160 +[gpub010:0/16] 2024-02-10 15:14:01,321 (trainer:762) INFO: 40epoch:train:7201-7300batch: iter_time=9.659e-05, forward_time=0.319, loss_ctc=50.113, loss_att=49.713, acc=0.751, loss=49.833, backward_time=0.305, grad_norm=57.163, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.592e-04, train_time=1.505 +[gpub010:0/16] 2024-02-10 15:15:57,965 (trainer:762) INFO: 40epoch:train:7301-7400batch: iter_time=9.279e-05, forward_time=0.296, loss_ctc=50.810, loss_att=50.470, acc=0.762, loss=50.572, backward_time=0.302, grad_norm=46.395, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.592e-04, train_time=1.166 +[gpub010:0/16] 2024-02-10 15:18:08,372 (trainer:762) INFO: 40epoch:train:7401-7500batch: iter_time=8.715e-05, forward_time=0.297, loss_ctc=42.877, loss_att=44.479, acc=0.771, loss=43.999, backward_time=0.300, grad_norm=41.705, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.591e-04, train_time=1.304 +[gpub010:0/16] 2024-02-10 15:18:28,430 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-02-10 15:18:47,367 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 15:18:50,923 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 15:18:50,923 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-02-10 15:18:50,962 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 15:27:14,599 (trainer:762) INFO: 40epoch:train:7501-7600batch: iter_time=3.722, forward_time=0.382, loss_ctc=44.289, loss_att=36.895, acc=0.792, loss=39.113, backward_time=0.332, grad_norm=39.732, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.591e-04, train_time=5.462 +[gpub010:0/16] 2024-02-10 15:29:03,248 (trainer:762) INFO: 40epoch:train:7601-7700batch: iter_time=8.307e-05, forward_time=0.292, loss_ctc=42.205, loss_att=44.701, acc=0.763, loss=43.952, backward_time=0.300, grad_norm=48.562, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.591e-04, train_time=1.086 +[gpub010:0/16] 2024-02-10 15:31:47,986 (trainer:762) INFO: 40epoch:train:7701-7800batch: iter_time=0.090, forward_time=0.437, loss_ctc=38.840, loss_att=34.142, acc=0.791, loss=35.551, backward_time=0.319, grad_norm=36.524, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.108, optim0_lr0=1.591e-04, train_time=1.647 +[gpub010:0/16] 2024-02-10 15:34:23,380 (trainer:762) INFO: 40epoch:train:7801-7900batch: iter_time=0.057, forward_time=0.379, loss_ctc=52.483, loss_att=53.233, acc=0.747, loss=53.008, backward_time=0.327, grad_norm=52.529, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.591e-04, train_time=1.554 +[gpub010:0/16] 2024-02-10 15:37:03,248 (trainer:762) INFO: 40epoch:train:7901-8000batch: iter_time=0.170, forward_time=0.343, loss_ctc=47.881, loss_att=49.544, acc=0.747, loss=49.045, backward_time=0.304, grad_norm=46.938, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.591e-04, train_time=1.598 +[gpub010:0/16] 2024-02-10 15:40:03,127 (trainer:762) INFO: 40epoch:train:8001-8100batch: iter_time=8.422e-05, forward_time=0.469, loss_ctc=44.988, loss_att=43.473, acc=0.791, loss=43.928, backward_time=0.456, grad_norm=38.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.591e-04, train_time=1.799 +[gpub010:0/16] 2024-02-10 15:42:25,163 (trainer:762) INFO: 40epoch:train:8101-8200batch: iter_time=8.845e-05, forward_time=0.319, loss_ctc=48.932, loss_att=48.941, acc=0.766, loss=48.938, backward_time=0.305, grad_norm=43.673, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.590e-04, train_time=1.420 +[gpub010:0/16] 2024-02-10 15:44:22,517 (trainer:762) INFO: 40epoch:train:8201-8300batch: iter_time=8.670e-05, forward_time=0.329, loss_ctc=43.758, loss_att=43.289, acc=0.772, loss=43.430, backward_time=0.321, grad_norm=40.936, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.590e-04, train_time=1.173 +[gpub010:0/16] 2024-02-10 15:46:28,002 (trainer:762) INFO: 40epoch:train:8301-8400batch: iter_time=8.886e-05, forward_time=0.336, loss_ctc=43.223, loss_att=39.455, acc=0.777, loss=40.585, backward_time=0.305, grad_norm=39.595, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.590e-04, train_time=1.255 +[gpub010:0/16] 2024-02-10 15:49:24,549 (trainer:762) INFO: 40epoch:train:8401-8500batch: iter_time=2.120e-04, forward_time=0.363, loss_ctc=53.401, loss_att=52.658, acc=0.749, loss=52.881, backward_time=0.354, grad_norm=60.254, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.590e-04, train_time=1.764 +[gpub010:0/16] 2024-02-10 15:52:17,508 (trainer:762) INFO: 40epoch:train:8501-8600batch: iter_time=0.316, forward_time=0.301, loss_ctc=49.646, loss_att=47.276, acc=0.766, loss=47.987, backward_time=0.300, grad_norm=46.742, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.590e-04, train_time=1.730 +[gpub010:0/16] 2024-02-10 15:54:32,854 (trainer:762) INFO: 40epoch:train:8601-8700batch: iter_time=3.897e-04, forward_time=0.401, loss_ctc=47.556, loss_att=46.210, acc=0.768, loss=46.614, backward_time=0.333, grad_norm=41.976, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.590e-04, train_time=1.352 +[gpub010:0/16] 2024-02-10 15:55:51,818 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-02-10 15:56:11,111 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 15:56:14,657 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 15:56:14,657 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-02-10 15:56:14,828 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 16:05:47,719 (trainer:762) INFO: 40epoch:train:8701-8800batch: iter_time=4.150, forward_time=0.358, loss_ctc=41.398, loss_att=39.924, acc=0.779, loss=40.366, backward_time=0.316, grad_norm=40.450, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.590e-04, train_time=6.749 +[gpub010:0/16] 2024-02-10 16:08:18,402 (trainer:762) INFO: 40epoch:train:8801-8900batch: iter_time=1.828e-04, forward_time=0.395, loss_ctc=45.110, loss_att=48.132, acc=0.758, loss=47.226, backward_time=0.340, grad_norm=44.905, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.590e-04, train_time=1.506 +[gpub010:0/16] 2024-02-10 16:10:47,719 (trainer:762) INFO: 40epoch:train:8901-9000batch: iter_time=7.883e-05, forward_time=0.357, loss_ctc=40.597, loss_att=39.638, acc=0.758, loss=39.926, backward_time=0.329, grad_norm=46.671, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.589e-04, train_time=1.492 +[gpub010:0/16] 2024-02-10 16:14:02,731 (trainer:762) INFO: 40epoch:train:9001-9100batch: iter_time=0.178, forward_time=0.487, loss_ctc=41.837, loss_att=39.321, acc=0.766, loss=40.076, backward_time=0.332, grad_norm=40.151, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.589e-04, train_time=1.950 +[gpub010:0/16] 2024-02-10 16:16:38,442 (trainer:762) INFO: 40epoch:train:9101-9200batch: iter_time=0.001, forward_time=0.349, loss_ctc=47.099, loss_att=51.183, acc=0.750, loss=49.957, backward_time=0.383, grad_norm=49.672, clip=100.000, loss_scale=5.244e+33, optim_step_time=0.099, optim0_lr0=1.589e-04, train_time=1.556 +[gpub010:0/16] 2024-02-10 16:19:15,352 (trainer:762) INFO: 40epoch:train:9201-9300batch: iter_time=3.146e-04, forward_time=0.422, loss_ctc=51.649, loss_att=46.330, acc=0.754, loss=47.926, backward_time=0.349, grad_norm=45.224, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.105, optim0_lr0=1.589e-04, train_time=1.569 +[gpub010:0/16] 2024-02-10 16:20:17,978 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 16:21:56,755 (trainer:762) INFO: 40epoch:train:9301-9400batch: iter_time=2.430e-04, forward_time=0.417, loss_ctc=48.893, loss_att=47.952, acc=0.771, loss=48.234, backward_time=0.370, grad_norm=46.859, clip=100.000, loss_scale=7.500e+33, optim_step_time=0.105, optim0_lr0=1.589e-04, train_time=1.614 +[gpub010:0/16] 2024-02-10 16:24:14,382 (trainer:762) INFO: 40epoch:train:9401-9500batch: iter_time=4.430e-04, forward_time=0.362, loss_ctc=43.546, loss_att=42.473, acc=0.771, loss=42.795, backward_time=0.348, grad_norm=37.420, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.589e-04, train_time=1.376 +[gpub010:0/16] 2024-02-10 16:26:27,585 (trainer:762) INFO: 40epoch:train:9501-9600batch: iter_time=1.857e-04, forward_time=0.364, loss_ctc=43.165, loss_att=39.572, acc=0.775, loss=40.650, backward_time=0.334, grad_norm=39.798, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.589e-04, train_time=1.331 +[gpub010:0/16] 2024-02-10 16:28:56,087 (trainer:762) INFO: 40epoch:train:9601-9700batch: iter_time=2.666e-04, forward_time=0.362, loss_ctc=46.256, loss_att=45.383, acc=0.762, loss=45.645, backward_time=0.331, grad_norm=43.110, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.588e-04, train_time=1.486 +[gpub010:0/16] 2024-02-10 16:31:04,930 (trainer:762) INFO: 40epoch:train:9701-9800batch: iter_time=0.001, forward_time=0.380, loss_ctc=49.120, loss_att=48.999, acc=0.742, loss=49.035, backward_time=0.347, grad_norm=60.857, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.588e-04, train_time=1.287 +[gpub010:0/16] 2024-02-10 16:33:39,061 (trainer:762) INFO: 40epoch:train:9801-9900batch: iter_time=9.320e-05, forward_time=0.446, loss_ctc=50.884, loss_att=50.389, acc=0.756, loss=50.537, backward_time=0.392, grad_norm=44.412, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.588e-04, train_time=1.542 +[gpub010:0/16] 2024-02-10 16:36:09,527 (trainer:762) INFO: 40epoch:train:9901-10000batch: iter_time=1.754e-04, forward_time=0.353, loss_ctc=42.773, loss_att=43.924, acc=0.770, loss=43.579, backward_time=0.338, grad_norm=42.171, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.588e-04, train_time=1.505 +[gpub010:0/16] 2024-02-10 16:36:29,786 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-02-10 16:36:48,847 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 16:36:52,360 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 16:36:52,360 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-02-10 16:36:52,402 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 16:47:20,178 (trainer:762) INFO: 40epoch:train:10001-10100batch: iter_time=5.478, forward_time=0.370, loss_ctc=44.174, loss_att=37.635, acc=0.790, loss=39.596, backward_time=0.313, grad_norm=40.459, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.588e-04, train_time=6.707 +[gpub010:0/16] 2024-02-10 16:50:00,581 (trainer:762) INFO: 40epoch:train:10101-10200batch: iter_time=9.127e-05, forward_time=0.472, loss_ctc=41.909, loss_att=44.989, acc=0.763, loss=44.065, backward_time=0.351, grad_norm=48.625, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.588e-04, train_time=1.603 +[gpub010:0/16] 2024-02-10 16:52:20,195 (trainer:762) INFO: 40epoch:train:10201-10300batch: iter_time=3.442e-04, forward_time=0.343, loss_ctc=38.641, loss_att=34.295, acc=0.790, loss=35.599, backward_time=0.331, grad_norm=36.113, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.588e-04, train_time=1.396 +[gpub010:0/16] 2024-02-10 16:54:27,011 (trainer:762) INFO: 40epoch:train:10301-10400batch: iter_time=2.526e-04, forward_time=0.315, loss_ctc=51.655, loss_att=52.621, acc=0.749, loss=52.331, backward_time=0.298, grad_norm=52.480, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.587e-04, train_time=1.268 +[gpub010:0/16] 2024-02-10 16:56:56,648 (trainer:762) INFO: 40epoch:train:10401-10500batch: iter_time=8.835e-05, forward_time=0.361, loss_ctc=47.462, loss_att=49.427, acc=0.748, loss=48.837, backward_time=0.325, grad_norm=46.564, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.587e-04, train_time=1.496 +[gpub010:0/16] 2024-02-10 16:59:15,198 (trainer:762) INFO: 40epoch:train:10501-10600batch: iter_time=2.003e-04, forward_time=0.366, loss_ctc=45.535, loss_att=43.643, acc=0.793, loss=44.211, backward_time=0.341, grad_norm=37.636, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.587e-04, train_time=1.385 +[gpub010:0/16] 2024-02-10 17:01:36,964 (trainer:762) INFO: 40epoch:train:10601-10700batch: iter_time=4.399e-04, forward_time=0.386, loss_ctc=48.143, loss_att=48.503, acc=0.767, loss=48.395, backward_time=0.343, grad_norm=45.643, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.587e-04, train_time=1.417 +[gpub010:0/16] 2024-02-10 17:03:58,965 (trainer:762) INFO: 40epoch:train:10701-10800batch: iter_time=3.182e-04, forward_time=0.356, loss_ctc=43.553, loss_att=42.817, acc=0.774, loss=43.038, backward_time=0.345, grad_norm=40.799, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.587e-04, train_time=1.420 +[gpub010:0/16] 2024-02-10 17:06:19,688 (trainer:762) INFO: 40epoch:train:10801-10900batch: iter_time=4.629e-04, forward_time=0.375, loss_ctc=42.799, loss_att=39.343, acc=0.777, loss=40.380, backward_time=0.354, grad_norm=38.152, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.587e-04, train_time=1.406 +[gpub010:0/16] 2024-02-10 17:08:42,205 (trainer:762) INFO: 40epoch:train:10901-11000batch: iter_time=4.537e-04, forward_time=0.383, loss_ctc=52.013, loss_att=54.351, acc=0.750, loss=53.650, backward_time=0.338, grad_norm=55.706, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.587e-04, train_time=1.426 +[gpub010:0/16] 2024-02-10 17:11:03,910 (trainer:762) INFO: 40epoch:train:11001-11100batch: iter_time=1.475e-04, forward_time=0.395, loss_ctc=49.129, loss_att=47.418, acc=0.765, loss=47.931, backward_time=0.357, grad_norm=47.058, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.587e-04, train_time=1.416 +[gpub010:0/16] 2024-02-10 17:13:19,822 (trainer:762) INFO: 40epoch:train:11101-11200batch: iter_time=1.864e-04, forward_time=0.410, loss_ctc=47.824, loss_att=47.038, acc=0.766, loss=47.274, backward_time=0.321, grad_norm=43.124, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.586e-04, train_time=1.360 +[gpub010:0/16] 2024-02-10 17:14:46,077 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-02-10 17:15:05,618 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 17:15:09,443 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 17:15:09,443 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-02-10 17:15:09,446 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 17:22:11,986 (trainer:762) INFO: 40epoch:train:11201-11300batch: iter_time=3.258, forward_time=0.368, loss_ctc=41.236, loss_att=39.453, acc=0.778, loss=39.988, backward_time=0.312, grad_norm=41.067, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.111, optim0_lr0=1.586e-04, train_time=5.321 +[gpub010:0/16] 2024-02-10 17:26:24,944 (trainer:762) INFO: 40epoch:train:11301-11400batch: iter_time=8.216e-05, forward_time=0.311, loss_ctc=45.011, loss_att=47.261, acc=0.761, loss=46.586, backward_time=0.302, grad_norm=43.842, clip=100.000, loss_scale=8.048e+33, optim_step_time=0.094, optim0_lr0=1.586e-04, train_time=2.529 +[gpub010:0/16] 2024-02-10 17:28:33,376 (trainer:762) INFO: 40epoch:train:11401-11500batch: iter_time=8.034e-05, forward_time=0.335, loss_ctc=40.544, loss_att=39.970, acc=0.759, loss=40.142, backward_time=0.340, grad_norm=46.728, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.586e-04, train_time=1.284 +[gpub010:0/16] 2024-02-10 17:30:42,433 (trainer:762) INFO: 40epoch:train:11501-11600batch: iter_time=7.929e-05, forward_time=0.308, loss_ctc=41.782, loss_att=39.182, acc=0.766, loss=39.962, backward_time=0.300, grad_norm=40.028, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.586e-04, train_time=1.290 +[gpub010:0/16] 2024-02-10 17:31:36,459 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 17:32:32,172 (trainer:762) INFO: 40epoch:train:11601-11700batch: iter_time=8.085e-05, forward_time=0.295, loss_ctc=46.717, loss_att=51.090, acc=0.752, loss=49.778, backward_time=0.304, grad_norm=48.991, clip=100.000, loss_scale=7.762e+33, optim_step_time=0.095, optim0_lr0=1.586e-04, train_time=1.097 +[gpub010:0/16] 2024-02-10 17:34:53,722 (trainer:762) INFO: 40epoch:train:11701-11800batch: iter_time=8.168e-05, forward_time=0.375, loss_ctc=51.427, loss_att=45.907, acc=0.756, loss=47.563, backward_time=0.352, grad_norm=46.620, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.586e-04, train_time=1.415 +[gpub010:0/16] 2024-02-10 17:36:59,367 (trainer:762) INFO: 40epoch:train:11801-11900batch: iter_time=8.592e-05, forward_time=0.293, loss_ctc=48.126, loss_att=47.795, acc=0.771, loss=47.894, backward_time=0.296, grad_norm=46.817, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.586e-04, train_time=1.256 +[gpub010:0/16] 2024-02-10 17:39:02,882 (trainer:762) INFO: 40epoch:train:11901-12000batch: iter_time=8.024e-05, forward_time=0.399, loss_ctc=42.975, loss_att=42.424, acc=0.771, loss=42.589, backward_time=0.324, grad_norm=37.446, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.585e-04, train_time=1.235 +[gpub010:0/16] 2024-02-10 17:41:13,960 (trainer:762) INFO: 40epoch:train:12001-12100batch: iter_time=9.051e-05, forward_time=0.299, loss_ctc=43.395, loss_att=39.529, acc=0.775, loss=40.688, backward_time=0.297, grad_norm=40.735, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.585e-04, train_time=1.310 +[gpub010:0/16] 2024-02-10 17:43:37,332 (trainer:762) INFO: 40epoch:train:12101-12200batch: iter_time=5.464e-04, forward_time=0.347, loss_ctc=46.647, loss_att=45.426, acc=0.763, loss=45.792, backward_time=0.327, grad_norm=43.134, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.585e-04, train_time=1.434 +[gpub010:0/16] 2024-02-10 17:45:31,038 (trainer:762) INFO: 40epoch:train:12201-12300batch: iter_time=8.838e-05, forward_time=0.337, loss_ctc=49.065, loss_att=47.987, acc=0.743, loss=48.310, backward_time=0.309, grad_norm=57.716, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.585e-04, train_time=1.137 +[gpub010:0/16] 2024-02-10 17:47:31,297 (trainer:762) INFO: 40epoch:train:12301-12400batch: iter_time=8.019e-05, forward_time=0.320, loss_ctc=50.300, loss_att=50.391, acc=0.756, loss=50.364, backward_time=0.301, grad_norm=45.438, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.585e-04, train_time=1.202 +[gpub010:0/16] 2024-02-10 17:50:01,713 (trainer:762) INFO: 40epoch:train:12401-12500batch: iter_time=8.485e-05, forward_time=0.390, loss_ctc=42.557, loss_att=43.797, acc=0.770, loss=43.425, backward_time=0.334, grad_norm=41.418, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.585e-04, train_time=1.504 +[gpub010:0/16] 2024-02-10 17:50:21,742 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-02-10 17:50:41,076 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 17:50:44,617 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 17:50:44,617 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-02-10 17:50:44,620 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 17:56:47,771 (trainer:762) INFO: 40epoch:train:12501-12600batch: iter_time=2.877, forward_time=0.289, loss_ctc=43.635, loss_att=37.420, acc=0.790, loss=39.284, backward_time=0.295, grad_norm=39.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.585e-04, train_time=4.060 +[gpub010:0/16] 2024-02-10 17:59:18,390 (trainer:762) INFO: 40epoch:train:12601-12700batch: iter_time=8.207e-05, forward_time=0.375, loss_ctc=41.666, loss_att=44.835, acc=0.763, loss=43.884, backward_time=0.321, grad_norm=48.591, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.584e-04, train_time=1.506 +[gpub010:0/16] 2024-02-10 18:01:14,110 (trainer:762) INFO: 40epoch:train:12701-12800batch: iter_time=8.467e-05, forward_time=0.307, loss_ctc=38.797, loss_att=34.183, acc=0.791, loss=35.567, backward_time=0.296, grad_norm=36.920, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.584e-04, train_time=1.157 +[gpub010:0/16] 2024-02-10 18:03:15,388 (trainer:762) INFO: 40epoch:train:12801-12900batch: iter_time=8.604e-05, forward_time=0.294, loss_ctc=51.567, loss_att=52.139, acc=0.750, loss=51.967, backward_time=0.300, grad_norm=51.074, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.584e-04, train_time=1.213 +[gpub010:0/16] 2024-02-10 18:05:39,460 (trainer:762) INFO: 40epoch:train:12901-13000batch: iter_time=8.697e-05, forward_time=0.354, loss_ctc=47.101, loss_att=49.117, acc=0.749, loss=48.512, backward_time=0.336, grad_norm=56.479, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.584e-04, train_time=1.440 +[gpub010:0/16] 2024-02-10 18:07:34,188 (trainer:762) INFO: 40epoch:train:13001-13100batch: iter_time=8.404e-05, forward_time=0.295, loss_ctc=44.802, loss_att=43.618, acc=0.792, loss=43.973, backward_time=0.300, grad_norm=39.404, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.584e-04, train_time=1.147 +[gpub010:0/16] 2024-02-10 18:10:12,587 (trainer:762) INFO: 40epoch:train:13101-13200batch: iter_time=8.857e-05, forward_time=0.378, loss_ctc=47.706, loss_att=48.642, acc=0.766, loss=48.362, backward_time=0.346, grad_norm=48.464, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.584e-04, train_time=1.584 +[gpub010:0/16] 2024-02-10 18:12:10,548 (trainer:762) INFO: 40epoch:train:13201-13300batch: iter_time=8.403e-05, forward_time=0.315, loss_ctc=43.591, loss_att=42.964, acc=0.774, loss=43.153, backward_time=0.300, grad_norm=39.737, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.584e-04, train_time=1.179 +[gpub010:0/16] 2024-02-10 18:14:32,783 (trainer:762) INFO: 40epoch:train:13301-13400batch: iter_time=8.826e-05, forward_time=0.292, loss_ctc=42.999, loss_att=39.433, acc=0.778, loss=40.503, backward_time=0.297, grad_norm=39.671, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.584e-04, train_time=1.422 +[gpub010:0/16] 2024-02-10 18:16:39,282 (trainer:762) INFO: 40epoch:train:13401-13500batch: iter_time=3.330e-04, forward_time=0.366, loss_ctc=51.891, loss_att=53.001, acc=0.751, loss=52.668, backward_time=0.328, grad_norm=63.029, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.583e-04, train_time=1.265 +[gpub010:0/16] 2024-02-10 18:18:53,545 (trainer:762) INFO: 40epoch:train:13501-13600batch: iter_time=8.326e-05, forward_time=0.293, loss_ctc=49.132, loss_att=47.299, acc=0.765, loss=47.849, backward_time=0.297, grad_norm=45.839, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.583e-04, train_time=1.342 +[gpub010:0/16] 2024-02-10 18:21:15,943 (trainer:762) INFO: 40epoch:train:13601-13700batch: iter_time=4.994e-04, forward_time=0.400, loss_ctc=47.607, loss_att=46.541, acc=0.768, loss=46.861, backward_time=0.334, grad_norm=43.434, clip=100.000, loss_scale=7.788e+33, optim_step_time=0.098, optim0_lr0=1.583e-04, train_time=1.423 +[gpub010:0/16] 2024-02-10 18:22:35,107 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-02-10 18:22:54,451 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 18:22:58,086 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 18:22:58,086 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-02-10 18:22:58,122 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 18:29:10,374 (trainer:762) INFO: 40epoch:train:13701-13800batch: iter_time=3.422, forward_time=0.308, loss_ctc=40.626, loss_att=39.595, acc=0.779, loss=39.904, backward_time=0.297, grad_norm=39.814, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.583e-04, train_time=4.745 +[gpub010:0/16] 2024-02-10 18:30:29,617 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 18:32:46,824 (trainer:762) INFO: 40epoch:train:13801-13900batch: iter_time=0.064, forward_time=0.292, loss_ctc=44.671, loss_att=46.689, acc=0.762, loss=46.083, backward_time=0.297, grad_norm=42.815, clip=100.000, loss_scale=7.395e+33, optim_step_time=0.093, optim0_lr0=1.583e-04, train_time=2.164 +[gpub010:0/16] 2024-02-10 18:35:07,666 (trainer:762) INFO: 40epoch:train:13901-14000batch: iter_time=8.613e-05, forward_time=0.358, loss_ctc=40.085, loss_att=38.978, acc=0.763, loss=39.310, backward_time=0.312, grad_norm=45.906, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.583e-04, train_time=1.408 +[gpub010:0/16] 2024-02-10 18:37:15,365 (trainer:762) INFO: 40epoch:train:14001-14100batch: iter_time=8.618e-05, forward_time=0.338, loss_ctc=41.667, loss_att=39.135, acc=0.766, loss=39.894, backward_time=0.302, grad_norm=41.043, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.583e-04, train_time=1.277 +[gpub010:0/16] 2024-02-10 18:39:09,220 (trainer:762) INFO: 40epoch:train:14101-14200batch: iter_time=8.847e-05, forward_time=0.295, loss_ctc=46.007, loss_att=50.889, acc=0.753, loss=49.425, backward_time=0.297, grad_norm=50.820, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.582e-04, train_time=1.138 +[gpub010:0/16] 2024-02-10 18:41:24,490 (trainer:762) INFO: 40epoch:train:14201-14300batch: iter_time=8.274e-05, forward_time=0.293, loss_ctc=50.698, loss_att=45.894, acc=0.757, loss=47.335, backward_time=0.296, grad_norm=46.856, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.582e-04, train_time=1.352 +[gpub010:0/16] 2024-02-10 18:43:59,082 (trainer:762) INFO: 40epoch:train:14301-14400batch: iter_time=0.007, forward_time=0.400, loss_ctc=48.065, loss_att=47.591, acc=0.771, loss=47.733, backward_time=0.323, grad_norm=47.556, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.582e-04, train_time=1.544 +[gpub010:0/16] 2024-02-10 18:46:14,024 (trainer:762) INFO: 40epoch:train:14401-14500batch: iter_time=8.048e-05, forward_time=0.294, loss_ctc=43.216, loss_att=42.211, acc=0.771, loss=42.513, backward_time=0.296, grad_norm=38.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.582e-04, train_time=1.351 +[gpub010:0/16] 2024-02-10 18:48:20,487 (trainer:762) INFO: 40epoch:train:14501-14600batch: iter_time=8.111e-05, forward_time=0.294, loss_ctc=43.108, loss_att=39.101, acc=0.776, loss=40.303, backward_time=0.296, grad_norm=39.622, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.582e-04, train_time=1.264 +[gpub010:0/16] 2024-02-10 18:50:21,813 (trainer:762) INFO: 40epoch:train:14601-14700batch: iter_time=8.259e-05, forward_time=0.323, loss_ctc=46.674, loss_att=45.844, acc=0.762, loss=46.093, backward_time=0.324, grad_norm=43.768, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.582e-04, train_time=1.213 +[gpub010:0/16] 2024-02-10 18:52:26,107 (trainer:762) INFO: 40epoch:train:14701-14800batch: iter_time=7.731e-05, forward_time=0.301, loss_ctc=48.759, loss_att=48.111, acc=0.743, loss=48.305, backward_time=0.301, grad_norm=62.194, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.582e-04, train_time=1.242 +[gpub010:0/16] 2024-02-10 18:54:28,600 (trainer:762) INFO: 40epoch:train:14801-14900batch: iter_time=7.851e-05, forward_time=0.296, loss_ctc=50.443, loss_att=50.178, acc=0.757, loss=50.257, backward_time=0.299, grad_norm=44.345, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.582e-04, train_time=1.226 +[gpub010:0/16] 2024-02-10 18:56:37,751 (trainer:762) INFO: 40epoch:train:14901-15000batch: iter_time=7.856e-05, forward_time=0.291, loss_ctc=42.289, loss_att=43.620, acc=0.771, loss=43.221, backward_time=0.295, grad_norm=41.412, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.581e-04, train_time=1.291 +[gpub010:0/16] 2024-02-10 19:34:17,972 (trainer:361) INFO: 40epoch results: [train] iter_time=0.280, forward_time=0.337, loss_ctc=46.283, loss_att=45.121, acc=0.764, loss=45.470, backward_time=0.316, grad_norm=45.290, clip=100.000, loss_scale=6.388e+33, optim_step_time=0.096, optim0_lr0=1.591e-04, train_time=1.696, time=7 hours, 4 minutes and 24.05 seconds, total_count=630000, gpu_max_cached_mem_GB=42.092, [valid] loss_ctc=34.803, cer_ctc=0.181, loss_att=37.461, acc=0.688, cer=0.342, wer=0.990, loss=36.663, time=37 minutes and 16.1 seconds, total_count=196182, gpu_max_cached_mem_GB=42.092 +[gpub010:0/16] 2024-02-10 19:34:36,327 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-02-10 19:34:36,445 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/35epoch.pth +[gpub010:0/16] 2024-02-10 19:34:36,445 (trainer:290) INFO: 41/45epoch started. Estimated time to finish: 1 day, 14 hours and 29 minutes +[gpub010:0/16] 2024-02-10 19:34:36,456 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-02-10 19:34:54,761 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 19:34:58,140 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 19:34:58,140 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-02-10 19:34:58,144 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 19:41:04,647 (trainer:762) INFO: 41epoch:train:1-100batch: iter_time=2.688, forward_time=0.298, loss_ctc=46.593, loss_att=45.969, acc=0.743, loss=46.156, backward_time=0.303, grad_norm=43.988, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.581e-04, train_time=3.882 +[gpub010:0/16] 2024-02-10 19:43:07,382 (trainer:762) INFO: 41epoch:train:101-200batch: iter_time=3.777e-04, forward_time=0.338, loss_ctc=51.338, loss_att=47.456, acc=0.751, loss=48.620, backward_time=0.339, grad_norm=46.507, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.581e-04, train_time=1.227 +[gpub010:0/16] 2024-02-10 19:45:20,988 (trainer:762) INFO: 41epoch:train:201-300batch: iter_time=8.162e-05, forward_time=0.302, loss_ctc=44.194, loss_att=49.320, acc=0.751, loss=47.782, backward_time=0.303, grad_norm=43.502, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.581e-04, train_time=1.335 +[gpub010:0/16] 2024-02-10 19:47:27,534 (trainer:762) INFO: 41epoch:train:301-400batch: iter_time=0.001, forward_time=0.322, loss_ctc=43.535, loss_att=40.460, acc=0.766, loss=41.382, backward_time=0.316, grad_norm=40.311, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.581e-04, train_time=1.266 +[gpub010:0/16] 2024-02-10 19:49:28,184 (trainer:762) INFO: 41epoch:train:401-500batch: iter_time=7.992e-05, forward_time=0.337, loss_ctc=48.269, loss_att=46.888, acc=0.759, loss=47.302, backward_time=0.307, grad_norm=45.753, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.581e-04, train_time=1.206 +[gpub010:0/16] 2024-02-10 19:51:38,073 (trainer:762) INFO: 41epoch:train:501-600batch: iter_time=9.111e-05, forward_time=0.317, loss_ctc=48.053, loss_att=40.673, acc=0.767, loss=42.887, backward_time=0.325, grad_norm=48.486, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.581e-04, train_time=1.298 +[gpub010:0/16] 2024-02-10 19:53:34,919 (trainer:762) INFO: 41epoch:train:601-700batch: iter_time=1.001e-04, forward_time=0.331, loss_ctc=45.160, loss_att=47.571, acc=0.741, loss=46.848, backward_time=0.312, grad_norm=46.600, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.580e-04, train_time=1.168 +[gpub010:0/16] 2024-02-10 19:55:42,828 (trainer:762) INFO: 41epoch:train:701-800batch: iter_time=8.349e-05, forward_time=0.311, loss_ctc=44.801, loss_att=41.916, acc=0.774, loss=42.782, backward_time=0.311, grad_norm=41.548, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.580e-04, train_time=1.279 +[gpub010:0/16] 2024-02-10 19:57:46,980 (trainer:762) INFO: 41epoch:train:801-900batch: iter_time=8.284e-04, forward_time=0.317, loss_ctc=45.771, loss_att=47.817, acc=0.752, loss=47.203, backward_time=0.319, grad_norm=42.683, clip=100.000, loss_scale=8.152e+33, optim_step_time=0.096, optim0_lr0=1.580e-04, train_time=1.241 +[gpub010:0/16] 2024-02-10 19:59:52,137 (trainer:762) INFO: 41epoch:train:901-1000batch: iter_time=8.410e-05, forward_time=0.315, loss_ctc=45.355, loss_att=47.770, acc=0.759, loss=47.046, backward_time=0.317, grad_norm=40.960, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.580e-04, train_time=1.251 +[gpub010:0/16] 2024-02-10 20:02:07,931 (trainer:762) INFO: 41epoch:train:1001-1100batch: iter_time=4.428e-04, forward_time=0.330, loss_ctc=49.820, loss_att=49.985, acc=0.738, loss=49.936, backward_time=0.312, grad_norm=55.714, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.580e-04, train_time=1.357 +[gpub010:0/16] 2024-02-10 20:04:09,476 (trainer:762) INFO: 41epoch:train:1101-1200batch: iter_time=8.704e-05, forward_time=0.309, loss_ctc=52.324, loss_att=49.608, acc=0.746, loss=50.423, backward_time=0.305, grad_norm=59.066, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.580e-04, train_time=1.216 +[gpub010:0/16] 2024-02-10 20:05:32,320 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-02-10 20:05:51,140 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 20:05:54,696 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 20:05:54,696 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-02-10 20:05:54,716 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 20:11:26,965 (trainer:762) INFO: 41epoch:train:1201-1300batch: iter_time=3.111, forward_time=0.350, loss_ctc=48.751, loss_att=46.980, acc=0.754, loss=47.511, backward_time=0.305, grad_norm=45.995, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.580e-04, train_time=4.375 +[gpub010:0/16] 2024-02-10 20:13:28,308 (trainer:762) INFO: 41epoch:train:1301-1400batch: iter_time=8.254e-05, forward_time=0.312, loss_ctc=47.279, loss_att=44.675, acc=0.764, loss=45.456, backward_time=0.310, grad_norm=42.541, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.580e-04, train_time=1.212 +[gpub010:0/16] 2024-02-10 20:15:21,667 (trainer:762) INFO: 41epoch:train:1401-1500batch: iter_time=7.871e-05, forward_time=0.306, loss_ctc=42.538, loss_att=44.213, acc=0.767, loss=43.711, backward_time=0.309, grad_norm=40.087, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.579e-04, train_time=1.134 +[gpub010:0/16] 2024-02-10 20:17:35,093 (trainer:762) INFO: 41epoch:train:1501-1600batch: iter_time=9.267e-05, forward_time=0.331, loss_ctc=46.309, loss_att=50.954, acc=0.753, loss=49.561, backward_time=0.304, grad_norm=44.227, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.579e-04, train_time=1.334 +[gpub010:0/16] 2024-02-10 20:19:27,880 (trainer:762) INFO: 41epoch:train:1601-1700batch: iter_time=8.035e-05, forward_time=0.311, loss_ctc=46.487, loss_att=45.809, acc=0.767, loss=46.012, backward_time=0.305, grad_norm=44.420, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.579e-04, train_time=1.127 +[gpub010:0/16] 2024-02-10 20:21:34,654 (trainer:762) INFO: 41epoch:train:1701-1800batch: iter_time=8.103e-05, forward_time=0.317, loss_ctc=45.735, loss_att=43.344, acc=0.769, loss=44.061, backward_time=0.304, grad_norm=42.224, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.579e-04, train_time=1.268 +[gpub010:0/16] 2024-02-10 20:23:48,734 (trainer:762) INFO: 41epoch:train:1801-1900batch: iter_time=8.040e-05, forward_time=0.313, loss_ctc=48.677, loss_att=42.167, acc=0.769, loss=44.120, backward_time=0.315, grad_norm=44.792, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.579e-04, train_time=1.341 +[gpub010:0/16] 2024-02-10 20:25:41,536 (trainer:762) INFO: 41epoch:train:1901-2000batch: iter_time=8.510e-05, forward_time=0.307, loss_ctc=42.524, loss_att=42.001, acc=0.770, loss=42.157, backward_time=0.301, grad_norm=40.871, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.579e-04, train_time=1.128 +[gpub010:0/16] 2024-02-10 20:27:43,126 (trainer:762) INFO: 41epoch:train:2001-2100batch: iter_time=8.800e-05, forward_time=0.316, loss_ctc=46.311, loss_att=49.037, acc=0.773, loss=48.219, backward_time=0.305, grad_norm=44.391, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.579e-04, train_time=1.216 +[gpub010:0/16] 2024-02-10 20:29:45,269 (trainer:762) INFO: 41epoch:train:2101-2200batch: iter_time=8.739e-05, forward_time=0.321, loss_ctc=44.500, loss_att=43.160, acc=0.767, loss=43.562, backward_time=0.307, grad_norm=40.922, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.579e-04, train_time=1.221 +[gpub010:0/16] 2024-02-10 20:31:42,780 (trainer:762) INFO: 41epoch:train:2201-2300batch: iter_time=8.456e-05, forward_time=0.304, loss_ctc=51.680, loss_att=50.477, acc=0.760, loss=50.838, backward_time=0.302, grad_norm=43.841, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.578e-04, train_time=1.175 +[gpub010:0/16] 2024-02-10 20:33:45,802 (trainer:762) INFO: 41epoch:train:2301-2400batch: iter_time=8.162e-05, forward_time=0.307, loss_ctc=41.878, loss_att=45.258, acc=0.755, loss=44.244, backward_time=0.303, grad_norm=45.780, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.578e-04, train_time=1.230 +[gpub010:0/16] 2024-02-10 20:35:48,972 (trainer:762) INFO: 41epoch:train:2401-2500batch: iter_time=6.622e-04, forward_time=0.312, loss_ctc=53.112, loss_att=50.445, acc=0.760, loss=51.245, backward_time=0.310, grad_norm=58.830, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.578e-04, train_time=1.232 +[gpub010:0/16] 2024-02-10 20:36:09,000 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-02-10 20:36:28,226 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 20:36:31,969 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 20:36:31,969 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-02-10 20:36:31,972 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 20:43:18,675 (trainer:762) INFO: 41epoch:train:2501-2600batch: iter_time=3.293, forward_time=0.332, loss_ctc=45.650, loss_att=45.818, acc=0.746, loss=45.767, backward_time=0.310, grad_norm=43.602, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.578e-04, train_time=4.497 +[gpub010:0/16] 2024-02-10 20:45:13,905 (trainer:762) INFO: 41epoch:train:2601-2700batch: iter_time=8.104e-05, forward_time=0.311, loss_ctc=50.417, loss_att=47.147, acc=0.757, loss=48.128, backward_time=0.301, grad_norm=43.992, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.578e-04, train_time=1.151 +[gpub010:0/16] 2024-02-10 20:47:27,262 (trainer:762) INFO: 41epoch:train:2701-2800batch: iter_time=2.710e-04, forward_time=0.304, loss_ctc=43.034, loss_att=49.417, acc=0.753, loss=47.502, backward_time=0.349, grad_norm=43.210, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.578e-04, train_time=1.334 +[gpub010:0/16] 2024-02-10 20:49:20,456 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 20:49:46,708 (trainer:762) INFO: 41epoch:train:2801-2900batch: iter_time=8.805e-05, forward_time=0.323, loss_ctc=42.505, loss_att=40.764, acc=0.767, loss=41.286, backward_time=0.314, grad_norm=40.753, clip=100.000, loss_scale=1.395e+34, optim_step_time=0.096, optim0_lr0=1.578e-04, train_time=1.394 +[gpub010:0/16] 2024-02-10 20:51:44,672 (trainer:762) INFO: 41epoch:train:2901-3000batch: iter_time=8.462e-05, forward_time=0.299, loss_ctc=46.623, loss_att=46.310, acc=0.764, loss=46.404, backward_time=0.308, grad_norm=43.537, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.577e-04, train_time=1.179 +[gpub010:0/16] 2024-02-10 20:53:44,844 (trainer:762) INFO: 41epoch:train:3001-3100batch: iter_time=8.452e-05, forward_time=0.321, loss_ctc=46.776, loss_att=39.973, acc=0.772, loss=42.014, backward_time=0.298, grad_norm=42.600, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.577e-04, train_time=1.201 +[gpub010:0/16] 2024-02-10 20:56:04,028 (trainer:762) INFO: 41epoch:train:3101-3200batch: iter_time=8.046e-05, forward_time=0.326, loss_ctc=44.242, loss_att=47.014, acc=0.745, loss=46.183, backward_time=0.322, grad_norm=44.614, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.577e-04, train_time=1.392 +[gpub010:0/16] 2024-02-10 20:58:07,825 (trainer:762) INFO: 41epoch:train:3201-3300batch: iter_time=8.414e-05, forward_time=0.315, loss_ctc=44.014, loss_att=41.446, acc=0.779, loss=42.216, backward_time=0.299, grad_norm=37.812, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.577e-04, train_time=1.238 +[gpub010:0/16] 2024-02-10 21:00:15,707 (trainer:762) INFO: 41epoch:train:3301-3400batch: iter_time=1.652e-04, forward_time=0.329, loss_ctc=44.940, loss_att=47.470, acc=0.753, loss=46.711, backward_time=0.304, grad_norm=42.998, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.577e-04, train_time=1.279 +[gpub010:0/16] 2024-02-10 21:02:23,358 (trainer:762) INFO: 41epoch:train:3401-3500batch: iter_time=7.935e-05, forward_time=0.334, loss_ctc=44.517, loss_att=46.791, acc=0.764, loss=46.109, backward_time=0.313, grad_norm=40.666, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.577e-04, train_time=1.276 +[gpub010:0/16] 2024-02-10 21:04:21,738 (trainer:762) INFO: 41epoch:train:3501-3600batch: iter_time=8.275e-05, forward_time=0.297, loss_ctc=47.366, loss_att=48.313, acc=0.742, loss=48.029, backward_time=0.298, grad_norm=49.681, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.577e-04, train_time=1.184 +[gpub010:0/16] 2024-02-10 21:06:26,650 (trainer:762) INFO: 41epoch:train:3601-3700batch: iter_time=7.541e-04, forward_time=0.336, loss_ctc=50.316, loss_att=48.092, acc=0.746, loss=48.759, backward_time=0.308, grad_norm=55.777, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.577e-04, train_time=1.248 +[gpub010:0/16] 2024-02-10 21:07:54,402 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-02-10 21:08:13,604 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 21:08:17,101 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 21:08:17,101 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-02-10 21:08:17,129 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 21:13:36,899 (trainer:762) INFO: 41epoch:train:3701-3800batch: iter_time=3.025, forward_time=0.316, loss_ctc=48.053, loss_att=46.740, acc=0.745, loss=47.134, backward_time=0.305, grad_norm=44.964, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.576e-04, train_time=4.303 +[gpub010:0/16] 2024-02-10 21:13:46,422 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 21:15:37,289 (trainer:762) INFO: 41epoch:train:3801-3900batch: iter_time=7.835e-05, forward_time=0.313, loss_ctc=46.573, loss_att=44.651, acc=0.757, loss=45.227, backward_time=0.300, grad_norm=42.620, clip=100.000, loss_scale=5.612e+33, optim_step_time=0.093, optim0_lr0=1.576e-04, train_time=1.204 +[gpub010:0/16] 2024-02-10 21:17:46,402 (trainer:762) INFO: 41epoch:train:3901-4000batch: iter_time=8.574e-05, forward_time=0.333, loss_ctc=42.019, loss_att=42.629, acc=0.771, loss=42.446, backward_time=0.308, grad_norm=40.160, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.576e-04, train_time=1.290 +[gpub010:0/16] 2024-02-10 21:19:48,755 (trainer:762) INFO: 41epoch:train:4001-4100batch: iter_time=9.152e-05, forward_time=0.312, loss_ctc=45.509, loss_att=49.962, acc=0.753, loss=48.626, backward_time=0.309, grad_norm=46.089, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.576e-04, train_time=1.225 +[gpub010:0/16] 2024-02-10 21:21:42,396 (trainer:762) INFO: 41epoch:train:4101-4200batch: iter_time=8.879e-05, forward_time=0.306, loss_ctc=45.709, loss_att=45.090, acc=0.766, loss=45.276, backward_time=0.298, grad_norm=44.018, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.576e-04, train_time=1.136 +[gpub010:0/16] 2024-02-10 21:23:53,517 (trainer:762) INFO: 41epoch:train:4201-4300batch: iter_time=8.309e-05, forward_time=0.300, loss_ctc=44.767, loss_att=42.535, acc=0.763, loss=43.205, backward_time=0.311, grad_norm=41.316, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.576e-04, train_time=1.311 +[gpub010:0/16] 2024-02-10 21:25:54,647 (trainer:762) INFO: 41epoch:train:4301-4400batch: iter_time=8.033e-05, forward_time=0.341, loss_ctc=48.413, loss_att=42.025, acc=0.771, loss=43.941, backward_time=0.313, grad_norm=44.315, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.576e-04, train_time=1.210 +[gpub010:0/16] 2024-02-10 21:27:58,542 (trainer:762) INFO: 41epoch:train:4401-4500batch: iter_time=9.245e-05, forward_time=0.309, loss_ctc=42.534, loss_att=42.407, acc=0.763, loss=42.445, backward_time=0.301, grad_norm=41.493, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.576e-04, train_time=1.240 +[gpub010:0/16] 2024-02-10 21:30:16,294 (trainer:762) INFO: 41epoch:train:4501-4600batch: iter_time=9.159e-05, forward_time=0.321, loss_ctc=45.914, loss_att=47.697, acc=0.765, loss=47.162, backward_time=0.311, grad_norm=44.030, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.575e-04, train_time=1.377 +[gpub010:0/16] 2024-02-10 21:32:10,590 (trainer:762) INFO: 41epoch:train:4601-4700batch: iter_time=8.800e-05, forward_time=0.302, loss_ctc=44.030, loss_att=43.949, acc=0.754, loss=43.974, backward_time=0.300, grad_norm=40.223, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.575e-04, train_time=1.143 +[gpub010:0/16] 2024-02-10 21:34:20,917 (trainer:762) INFO: 41epoch:train:4701-4800batch: iter_time=8.566e-05, forward_time=0.308, loss_ctc=50.588, loss_att=50.407, acc=0.752, loss=50.462, backward_time=0.310, grad_norm=47.415, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.575e-04, train_time=1.302 +[gpub010:0/16] 2024-02-10 21:35:43,665 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 21:36:17,347 (trainer:762) INFO: 41epoch:train:4801-4900batch: iter_time=9.516e-05, forward_time=0.304, loss_ctc=41.912, loss_att=44.292, acc=0.749, loss=43.578, backward_time=0.314, grad_norm=48.422, clip=100.000, loss_scale=4.458e+33, optim_step_time=0.095, optim0_lr0=1.575e-04, train_time=1.165 +[gpub010:0/16] 2024-02-10 21:38:25,154 (trainer:762) INFO: 41epoch:train:4901-5000batch: iter_time=8.164e-05, forward_time=0.314, loss_ctc=51.818, loss_att=48.494, acc=0.750, loss=49.491, backward_time=0.304, grad_norm=59.340, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.575e-04, train_time=1.278 +[gpub010:0/16] 2024-02-10 21:38:45,182 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-02-10 21:39:04,477 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 21:39:08,051 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 21:39:08,051 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-02-10 21:39:08,056 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 21:45:53,719 (trainer:762) INFO: 41epoch:train:5001-5100batch: iter_time=3.179, forward_time=0.310, loss_ctc=45.499, loss_att=45.030, acc=0.751, loss=45.171, backward_time=0.305, grad_norm=43.447, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.575e-04, train_time=4.485 +[gpub010:0/16] 2024-02-10 21:47:55,390 (trainer:762) INFO: 41epoch:train:5101-5200batch: iter_time=8.054e-05, forward_time=0.318, loss_ctc=49.985, loss_att=46.346, acc=0.760, loss=47.438, backward_time=0.320, grad_norm=44.695, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.575e-04, train_time=1.216 +[gpub010:0/16] 2024-02-10 21:50:00,158 (trainer:762) INFO: 41epoch:train:5201-5300batch: iter_time=7.821e-05, forward_time=0.302, loss_ctc=42.526, loss_att=49.222, acc=0.755, loss=47.213, backward_time=0.303, grad_norm=42.114, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.574e-04, train_time=1.248 +[gpub010:0/16] 2024-02-10 21:51:57,031 (trainer:762) INFO: 41epoch:train:5301-5400batch: iter_time=7.551e-05, forward_time=0.292, loss_ctc=42.335, loss_att=40.132, acc=0.770, loss=40.793, backward_time=0.296, grad_norm=39.490, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.574e-04, train_time=1.168 +[gpub010:0/16] 2024-02-10 21:53:57,796 (trainer:762) INFO: 41epoch:train:5401-5500batch: iter_time=7.805e-05, forward_time=0.308, loss_ctc=45.837, loss_att=45.744, acc=0.769, loss=45.772, backward_time=0.312, grad_norm=42.509, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.574e-04, train_time=1.208 +[gpub010:0/16] 2024-02-10 21:56:08,746 (trainer:762) INFO: 41epoch:train:5501-5600batch: iter_time=8.206e-05, forward_time=0.347, loss_ctc=46.830, loss_att=39.784, acc=0.774, loss=41.898, backward_time=0.304, grad_norm=43.962, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.574e-04, train_time=1.309 +[gpub010:0/16] 2024-02-10 21:58:04,715 (trainer:762) INFO: 41epoch:train:5601-5700batch: iter_time=8.178e-05, forward_time=0.299, loss_ctc=43.787, loss_att=46.579, acc=0.747, loss=45.741, backward_time=0.298, grad_norm=43.978, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.574e-04, train_time=1.160 +[gpub010:0/16] 2024-02-10 22:00:06,630 (trainer:762) INFO: 41epoch:train:5701-5800batch: iter_time=8.269e-05, forward_time=0.291, loss_ctc=43.793, loss_att=41.052, acc=0.780, loss=41.874, backward_time=0.296, grad_norm=37.757, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.574e-04, train_time=1.219 +[gpub010:0/16] 2024-02-10 22:02:09,142 (trainer:762) INFO: 41epoch:train:5801-5900batch: iter_time=8.299e-05, forward_time=0.307, loss_ctc=44.735, loss_att=47.289, acc=0.754, loss=46.523, backward_time=0.313, grad_norm=43.293, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.574e-04, train_time=1.225 +[gpub010:0/16] 2024-02-10 22:04:17,904 (trainer:762) INFO: 41epoch:train:5901-6000batch: iter_time=2.121e-04, forward_time=0.319, loss_ctc=44.428, loss_att=46.530, acc=0.766, loss=45.899, backward_time=0.312, grad_norm=42.209, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.574e-04, train_time=1.286 +[gpub010:0/16] 2024-02-10 22:06:15,503 (trainer:762) INFO: 41epoch:train:6001-6100batch: iter_time=8.121e-05, forward_time=0.299, loss_ctc=48.001, loss_att=49.336, acc=0.744, loss=48.936, backward_time=0.302, grad_norm=50.070, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.573e-04, train_time=1.177 +[gpub010:0/16] 2024-02-10 22:08:24,152 (trainer:762) INFO: 41epoch:train:6101-6200batch: iter_time=7.943e-05, forward_time=0.291, loss_ctc=49.436, loss_att=47.068, acc=0.748, loss=47.778, backward_time=0.294, grad_norm=55.856, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.573e-04, train_time=1.286 +[gpub010:0/16] 2024-02-10 22:09:43,015 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-02-10 22:10:01,923 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 22:10:05,395 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 22:10:05,395 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-02-10 22:10:05,400 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 22:14:43,658 (trainer:762) INFO: 41epoch:train:6201-6300batch: iter_time=2.586, forward_time=0.353, loss_ctc=48.059, loss_att=47.031, acc=0.755, loss=47.340, backward_time=0.303, grad_norm=45.940, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.573e-04, train_time=3.795 +[gpub010:0/16] 2024-02-10 22:16:52,560 (trainer:762) INFO: 41epoch:train:6301-6400batch: iter_time=8.033e-05, forward_time=0.301, loss_ctc=46.527, loss_att=44.584, acc=0.766, loss=45.167, backward_time=0.305, grad_norm=42.326, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.573e-04, train_time=1.289 +[gpub010:0/16] 2024-02-10 22:18:51,463 (trainer:762) INFO: 41epoch:train:6401-6500batch: iter_time=8.040e-05, forward_time=0.291, loss_ctc=41.920, loss_att=43.404, acc=0.771, loss=42.959, backward_time=0.296, grad_norm=40.313, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.573e-04, train_time=1.189 +[gpub010:0/16] 2024-02-10 22:21:04,165 (trainer:762) INFO: 41epoch:train:6501-6600batch: iter_time=9.686e-05, forward_time=0.345, loss_ctc=45.443, loss_att=49.579, acc=0.764, loss=48.338, backward_time=0.310, grad_norm=44.294, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.573e-04, train_time=1.327 +[gpub010:0/16] 2024-02-10 22:23:00,135 (trainer:762) INFO: 41epoch:train:6601-6700batch: iter_time=8.110e-05, forward_time=0.292, loss_ctc=45.260, loss_att=45.293, acc=0.772, loss=45.283, backward_time=0.296, grad_norm=40.949, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.573e-04, train_time=1.159 +[gpub010:0/16] 2024-02-10 22:25:10,022 (trainer:762) INFO: 41epoch:train:6701-6800batch: iter_time=8.471e-05, forward_time=0.306, loss_ctc=44.328, loss_att=43.353, acc=0.771, loss=43.646, backward_time=0.296, grad_norm=41.619, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.573e-04, train_time=1.299 +[gpub010:0/16] 2024-02-10 22:27:02,669 (trainer:762) INFO: 41epoch:train:6801-6900batch: iter_time=8.815e-05, forward_time=0.293, loss_ctc=47.286, loss_att=40.869, acc=0.778, loss=42.794, backward_time=0.299, grad_norm=43.604, clip=100.000, loss_scale=3.323e+33, optim_step_time=0.093, optim0_lr0=1.572e-04, train_time=1.126 +[gpub010:0/16] 2024-02-10 22:29:17,898 (trainer:762) INFO: 41epoch:train:6901-7000batch: iter_time=8.309e-05, forward_time=0.350, loss_ctc=42.270, loss_att=41.882, acc=0.773, loss=41.998, backward_time=0.327, grad_norm=40.371, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.572e-04, train_time=1.352 +[gpub010:0/16] 2024-02-10 22:31:17,208 (trainer:762) INFO: 41epoch:train:7001-7100batch: iter_time=8.327e-05, forward_time=0.307, loss_ctc=45.943, loss_att=49.107, acc=0.777, loss=48.157, backward_time=0.301, grad_norm=45.557, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.572e-04, train_time=1.193 +[gpub010:0/16] 2024-02-10 22:33:24,326 (trainer:762) INFO: 41epoch:train:7101-7200batch: iter_time=9.230e-05, forward_time=0.293, loss_ctc=43.691, loss_att=43.008, acc=0.770, loss=43.213, backward_time=0.297, grad_norm=39.281, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.572e-04, train_time=1.271 +[gpub010:0/16] 2024-02-10 22:35:20,535 (trainer:762) INFO: 41epoch:train:7201-7300batch: iter_time=8.235e-05, forward_time=0.297, loss_ctc=50.618, loss_att=50.749, acc=0.761, loss=50.710, backward_time=0.302, grad_norm=46.131, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.572e-04, train_time=1.162 +[gpub010:0/16] 2024-02-10 22:37:33,727 (trainer:762) INFO: 41epoch:train:7301-7400batch: iter_time=8.867e-05, forward_time=0.360, loss_ctc=41.300, loss_att=45.070, acc=0.758, loss=43.939, backward_time=0.314, grad_norm=45.536, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.572e-04, train_time=1.331 +[gpub010:0/16] 2024-02-10 22:39:45,841 (trainer:762) INFO: 41epoch:train:7401-7500batch: iter_time=8.441e-05, forward_time=0.293, loss_ctc=51.915, loss_att=49.113, acc=0.764, loss=49.954, backward_time=0.297, grad_norm=58.716, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.572e-04, train_time=1.321 +[gpub010:0/16] 2024-02-10 22:40:05,997 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-02-10 22:40:25,136 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 22:40:28,686 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 22:40:28,686 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-02-10 22:40:28,773 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 22:46:58,118 (trainer:762) INFO: 41epoch:train:7501-7600batch: iter_time=3.100, forward_time=0.344, loss_ctc=45.223, loss_att=46.047, acc=0.748, loss=45.800, backward_time=0.312, grad_norm=43.582, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.571e-04, train_time=4.323 +[gpub010:0/16] 2024-02-10 22:48:58,807 (trainer:762) INFO: 41epoch:train:7601-7700batch: iter_time=7.838e-05, forward_time=0.299, loss_ctc=50.055, loss_att=47.719, acc=0.758, loss=48.420, backward_time=0.303, grad_norm=44.562, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.571e-04, train_time=1.206 +[gpub010:0/16] 2024-02-10 22:51:02,950 (trainer:762) INFO: 41epoch:train:7701-7800batch: iter_time=8.245e-05, forward_time=0.291, loss_ctc=42.239, loss_att=48.801, acc=0.757, loss=46.832, backward_time=0.295, grad_norm=42.619, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.571e-04, train_time=1.241 +[gpub010:0/16] 2024-02-10 22:53:00,348 (trainer:762) INFO: 41epoch:train:7801-7900batch: iter_time=8.234e-05, forward_time=0.300, loss_ctc=42.042, loss_att=39.716, acc=0.772, loss=40.414, backward_time=0.306, grad_norm=39.709, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.571e-04, train_time=1.174 +[gpub010:0/16] 2024-02-10 22:55:10,683 (trainer:762) INFO: 41epoch:train:7901-8000batch: iter_time=8.010e-05, forward_time=0.333, loss_ctc=45.665, loss_att=45.803, acc=0.767, loss=45.761, backward_time=0.317, grad_norm=42.371, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.571e-04, train_time=1.303 +[gpub010:0/16] 2024-02-10 22:59:16,774 (trainer:762) INFO: 41epoch:train:8001-8100batch: iter_time=0.667, forward_time=0.301, loss_ctc=46.394, loss_att=39.329, acc=0.775, loss=41.449, backward_time=0.296, grad_norm=44.005, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.571e-04, train_time=2.460 +[gpub010:0/16] 2024-02-10 22:59:39,979 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-10 23:01:55,290 (trainer:762) INFO: 41epoch:train:8101-8200batch: iter_time=0.180, forward_time=0.297, loss_ctc=43.237, loss_att=46.092, acc=0.749, loss=45.236, backward_time=0.300, grad_norm=43.038, clip=100.000, loss_scale=2.963e+33, optim_step_time=0.094, optim0_lr0=1.571e-04, train_time=1.585 +[gpub010:0/16] 2024-02-10 23:05:53,360 (trainer:762) INFO: 41epoch:train:8201-8300batch: iter_time=8.282e-05, forward_time=0.340, loss_ctc=43.717, loss_att=41.222, acc=0.780, loss=41.970, backward_time=0.671, grad_norm=36.631, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.571e-04, train_time=2.380 +[gpub010:0/16] 2024-02-10 23:10:08,098 (trainer:762) INFO: 41epoch:train:8301-8400batch: iter_time=8.618e-05, forward_time=0.317, loss_ctc=44.493, loss_att=46.960, acc=0.757, loss=46.220, backward_time=0.301, grad_norm=41.041, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.570e-04, train_time=2.547 +[gpub010:0/16] 2024-02-10 23:12:10,092 (trainer:762) INFO: 41epoch:train:8401-8500batch: iter_time=8.465e-05, forward_time=0.292, loss_ctc=44.107, loss_att=46.685, acc=0.766, loss=45.912, backward_time=0.297, grad_norm=42.209, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.570e-04, train_time=1.219 +[gpub010:0/16] 2024-02-10 23:14:01,953 (trainer:762) INFO: 41epoch:train:8501-8600batch: iter_time=7.862e-05, forward_time=0.302, loss_ctc=46.911, loss_att=48.329, acc=0.745, loss=47.904, backward_time=0.303, grad_norm=48.536, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.570e-04, train_time=1.119 +[gpub010:0/16] 2024-02-10 23:16:28,280 (trainer:762) INFO: 41epoch:train:8601-8700batch: iter_time=4.394e-04, forward_time=0.357, loss_ctc=49.187, loss_att=46.495, acc=0.752, loss=47.303, backward_time=0.308, grad_norm=58.254, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.570e-04, train_time=1.463 +[gpub010:0/16] 2024-02-10 23:17:50,831 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-02-10 23:18:09,819 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 23:18:13,353 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 23:18:13,353 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-02-10 23:18:13,402 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-10 23:23:46,845 (trainer:762) INFO: 41epoch:train:8701-8800batch: iter_time=3.202, forward_time=0.304, loss_ctc=47.601, loss_att=46.233, acc=0.759, loss=46.643, backward_time=0.297, grad_norm=46.308, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.570e-04, train_time=4.386 +[gpub010:0/16] 2024-02-10 23:25:53,432 (trainer:762) INFO: 41epoch:train:8801-8900batch: iter_time=8.452e-05, forward_time=0.349, loss_ctc=45.681, loss_att=43.820, acc=0.769, loss=44.378, backward_time=0.312, grad_norm=42.085, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.570e-04, train_time=1.266 +[gpub010:0/16] 2024-02-10 23:27:59,399 (trainer:762) INFO: 41epoch:train:8901-9000batch: iter_time=8.737e-05, forward_time=0.298, loss_ctc=42.084, loss_att=43.948, acc=0.772, loss=43.389, backward_time=0.302, grad_norm=42.344, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.570e-04, train_time=1.259 +[gpub010:0/16] 2024-02-10 23:29:58,208 (trainer:762) INFO: 41epoch:train:9001-9100batch: iter_time=8.736e-05, forward_time=0.307, loss_ctc=45.103, loss_att=49.841, acc=0.758, loss=48.420, backward_time=0.301, grad_norm=44.090, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.570e-04, train_time=1.188 +[gpub010:0/16] 2024-02-10 23:32:04,419 (trainer:762) INFO: 41epoch:train:9101-9200batch: iter_time=8.893e-05, forward_time=0.327, loss_ctc=44.622, loss_att=45.027, acc=0.771, loss=44.905, backward_time=0.328, grad_norm=42.667, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.569e-04, train_time=1.262 +[gpub010:0/16] 2024-02-10 23:37:05,940 (trainer:762) INFO: 41epoch:train:9201-9300batch: iter_time=0.383, forward_time=0.293, loss_ctc=44.558, loss_att=42.361, acc=0.773, loss=43.020, backward_time=0.299, grad_norm=41.601, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.569e-04, train_time=3.015 +[gpub010:0/16] 2024-02-10 23:39:03,542 (trainer:762) INFO: 41epoch:train:9301-9400batch: iter_time=8.445e-05, forward_time=0.301, loss_ctc=47.181, loss_att=41.155, acc=0.776, loss=42.963, backward_time=0.304, grad_norm=42.528, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.569e-04, train_time=1.176 +[gpub010:0/16] 2024-02-10 23:41:12,626 (trainer:762) INFO: 41epoch:train:9401-9500batch: iter_time=9.848e-05, forward_time=0.348, loss_ctc=42.262, loss_att=41.528, acc=0.775, loss=41.748, backward_time=0.323, grad_norm=40.446, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.569e-04, train_time=1.290 +[gpub010:0/16] 2024-02-10 23:43:17,703 (trainer:762) INFO: 41epoch:train:9501-9600batch: iter_time=9.281e-05, forward_time=0.296, loss_ctc=45.263, loss_att=48.378, acc=0.778, loss=47.444, backward_time=0.301, grad_norm=40.897, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.569e-04, train_time=1.250 +[gpub010:0/16] 2024-02-10 23:46:57,199 (trainer:762) INFO: 41epoch:train:9601-9700batch: iter_time=9.807e-05, forward_time=0.379, loss_ctc=43.394, loss_att=42.677, acc=0.772, loss=42.892, backward_time=0.503, grad_norm=41.035, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.157, optim0_lr0=1.569e-04, train_time=2.190 +[gpub010:0/16] 2024-02-10 23:49:27,632 (trainer:762) INFO: 41epoch:train:9701-9800batch: iter_time=0.005, forward_time=0.425, loss_ctc=50.146, loss_att=50.293, acc=0.763, loss=50.249, backward_time=0.310, grad_norm=44.605, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.569e-04, train_time=1.510 +[gpub010:0/16] 2024-02-10 23:51:20,252 (trainer:762) INFO: 41epoch:train:9801-9900batch: iter_time=8.064e-05, forward_time=0.297, loss_ctc=40.245, loss_att=44.392, acc=0.760, loss=43.148, backward_time=0.302, grad_norm=45.085, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.569e-04, train_time=1.126 +[gpub010:0/16] 2024-02-10 23:53:33,566 (trainer:762) INFO: 41epoch:train:9901-10000batch: iter_time=4.624e-04, forward_time=0.356, loss_ctc=51.038, loss_att=48.244, acc=0.765, loss=49.082, backward_time=0.337, grad_norm=55.557, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.568e-04, train_time=1.333 +[gpub010:0/16] 2024-02-10 23:53:53,645 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-02-10 23:54:12,795 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-10 23:54:16,365 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-10 23:54:16,365 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-02-10 23:54:16,370 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 00:00:53,277 (trainer:762) INFO: 41epoch:train:10001-10100batch: iter_time=3.245, forward_time=0.310, loss_ctc=45.086, loss_att=44.004, acc=0.767, loss=44.328, backward_time=0.298, grad_norm=40.928, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.568e-04, train_time=4.397 +[gpub010:0/16] 2024-02-11 00:03:02,825 (trainer:762) INFO: 41epoch:train:10101-10200batch: iter_time=8.898e-05, forward_time=0.321, loss_ctc=49.653, loss_att=47.143, acc=0.763, loss=47.896, backward_time=0.332, grad_norm=43.861, clip=100.000, loss_scale=4.803e+33, optim_step_time=0.097, optim0_lr0=1.568e-04, train_time=1.295 +[gpub010:0/16] 2024-02-11 00:05:18,245 (trainer:762) INFO: 41epoch:train:10201-10300batch: iter_time=8.261e-05, forward_time=0.310, loss_ctc=42.112, loss_att=47.902, acc=0.770, loss=46.165, backward_time=0.307, grad_norm=41.327, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.568e-04, train_time=1.355 +[gpub010:0/16] 2024-02-11 00:07:09,778 (trainer:762) INFO: 41epoch:train:10301-10400batch: iter_time=8.437e-05, forward_time=0.291, loss_ctc=41.667, loss_att=39.203, acc=0.779, loss=39.943, backward_time=0.296, grad_norm=38.940, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.568e-04, train_time=1.115 +[gpub010:0/16] 2024-02-11 00:09:23,679 (trainer:762) INFO: 41epoch:train:10401-10500batch: iter_time=8.692e-05, forward_time=0.315, loss_ctc=45.210, loss_att=45.081, acc=0.778, loss=45.120, backward_time=0.312, grad_norm=43.209, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.568e-04, train_time=1.339 +[gpub010:0/16] 2024-02-11 00:11:32,567 (trainer:762) INFO: 41epoch:train:10501-10600batch: iter_time=8.851e-05, forward_time=0.327, loss_ctc=46.827, loss_att=39.731, acc=0.777, loss=41.860, backward_time=0.306, grad_norm=44.179, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.568e-04, train_time=1.289 +[gpub010:0/16] 2024-02-11 00:13:47,942 (trainer:762) INFO: 41epoch:train:10601-10700batch: iter_time=9.932e-05, forward_time=0.292, loss_ctc=43.220, loss_att=45.785, acc=0.757, loss=45.016, backward_time=0.297, grad_norm=43.973, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.567e-04, train_time=1.354 +[gpub010:0/16] 2024-02-11 00:16:27,207 (trainer:762) INFO: 41epoch:train:10701-10800batch: iter_time=9.607e-05, forward_time=0.521, loss_ctc=43.345, loss_att=40.940, acc=0.789, loss=41.662, backward_time=0.327, grad_norm=36.552, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.567e-04, train_time=1.592 +[gpub010:0/16] 2024-02-11 00:18:40,457 (trainer:762) INFO: 41epoch:train:10801-10900batch: iter_time=8.367e-05, forward_time=0.316, loss_ctc=44.090, loss_att=46.403, acc=0.771, loss=45.709, backward_time=0.305, grad_norm=40.801, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.567e-04, train_time=1.332 +[gpub010:0/16] 2024-02-11 00:20:40,551 (trainer:762) INFO: 41epoch:train:10901-11000batch: iter_time=8.828e-05, forward_time=0.293, loss_ctc=43.641, loss_att=46.221, acc=0.778, loss=45.447, backward_time=0.299, grad_norm=39.684, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.567e-04, train_time=1.202 +[gpub010:0/16] 2024-02-11 00:22:49,656 (trainer:762) INFO: 41epoch:train:11001-11100batch: iter_time=1.961e-04, forward_time=0.343, loss_ctc=46.557, loss_att=48.799, acc=0.751, loss=48.127, backward_time=0.339, grad_norm=47.692, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.567e-04, train_time=1.291 +[gpub010:0/16] 2024-02-11 00:25:02,573 (trainer:762) INFO: 41epoch:train:11101-11200batch: iter_time=8.271e-05, forward_time=0.307, loss_ctc=48.371, loss_att=46.361, acc=0.764, loss=46.964, backward_time=0.301, grad_norm=58.228, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.567e-04, train_time=1.329 +[gpub010:0/16] 2024-02-11 00:26:31,648 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-02-11 00:26:50,856 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 00:26:54,442 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 00:26:54,442 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-02-11 00:26:54,445 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 00:32:46,058 (trainer:762) INFO: 41epoch:train:11201-11300batch: iter_time=3.291, forward_time=0.363, loss_ctc=47.302, loss_att=45.570, acc=0.768, loss=46.089, backward_time=0.313, grad_norm=44.329, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.567e-04, train_time=4.635 +[gpub010:0/16] 2024-02-11 00:34:57,105 (trainer:762) INFO: 41epoch:train:11301-11400batch: iter_time=8.118e-05, forward_time=0.320, loss_ctc=46.014, loss_att=43.358, acc=0.771, loss=44.155, backward_time=0.314, grad_norm=40.914, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.567e-04, train_time=1.310 +[gpub010:0/16] 2024-02-11 00:36:58,958 (trainer:762) INFO: 41epoch:train:11401-11500batch: iter_time=8.171e-05, forward_time=0.289, loss_ctc=41.474, loss_att=42.628, acc=0.776, loss=42.281, backward_time=0.295, grad_norm=39.888, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.566e-04, train_time=1.219 +[gpub010:0/16] 2024-02-11 00:39:05,053 (trainer:762) INFO: 41epoch:train:11501-11600batch: iter_time=8.172e-05, forward_time=0.333, loss_ctc=45.057, loss_att=49.099, acc=0.760, loss=47.887, backward_time=0.316, grad_norm=44.724, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.566e-04, train_time=1.261 +[gpub010:0/16] 2024-02-11 00:41:07,551 (trainer:762) INFO: 41epoch:train:11601-11700batch: iter_time=8.651e-05, forward_time=0.311, loss_ctc=44.551, loss_att=44.779, acc=0.772, loss=44.710, backward_time=0.304, grad_norm=41.629, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.566e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 00:43:26,535 (trainer:762) INFO: 41epoch:train:11701-11800batch: iter_time=8.332e-05, forward_time=0.311, loss_ctc=44.416, loss_att=42.337, acc=0.775, loss=42.961, backward_time=0.305, grad_norm=40.164, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.566e-04, train_time=1.390 +[gpub010:0/16] 2024-02-11 00:45:21,448 (trainer:762) INFO: 41epoch:train:11801-11900batch: iter_time=9.315e-05, forward_time=0.309, loss_ctc=47.161, loss_att=41.428, acc=0.776, loss=43.148, backward_time=0.307, grad_norm=43.989, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.566e-04, train_time=1.148 +[gpub010:0/16] 2024-02-11 00:47:27,373 (trainer:762) INFO: 41epoch:train:11901-12000batch: iter_time=8.813e-05, forward_time=0.321, loss_ctc=41.682, loss_att=40.991, acc=0.776, loss=41.198, backward_time=0.305, grad_norm=39.336, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.566e-04, train_time=1.259 +[gpub010:0/16] 2024-02-11 00:49:46,909 (trainer:762) INFO: 41epoch:train:12001-12100batch: iter_time=8.831e-05, forward_time=0.315, loss_ctc=45.493, loss_att=48.231, acc=0.778, loss=47.409, backward_time=0.327, grad_norm=43.442, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.566e-04, train_time=1.396 +[gpub010:0/16] 2024-02-11 00:51:39,051 (trainer:762) INFO: 41epoch:train:12101-12200batch: iter_time=8.498e-05, forward_time=0.307, loss_ctc=43.511, loss_att=42.400, acc=0.773, loss=42.733, backward_time=0.310, grad_norm=38.933, clip=100.000, loss_scale=9.606e+33, optim_step_time=0.094, optim0_lr0=1.566e-04, train_time=1.121 +[gpub010:0/16] 2024-02-11 00:52:45,447 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 00:53:40,910 (trainer:762) INFO: 41epoch:train:12201-12300batch: iter_time=8.215e-05, forward_time=0.293, loss_ctc=49.488, loss_att=49.655, acc=0.765, loss=49.605, backward_time=0.299, grad_norm=43.264, clip=100.000, loss_scale=8.024e+33, optim_step_time=0.093, optim0_lr0=1.565e-04, train_time=1.217 +[gpub010:0/16] 2024-02-11 00:55:57,350 (trainer:762) INFO: 41epoch:train:12301-12400batch: iter_time=9.075e-05, forward_time=0.339, loss_ctc=39.951, loss_att=44.184, acc=0.762, loss=42.914, backward_time=0.312, grad_norm=44.250, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.565e-04, train_time=1.365 +[gpub010:0/16] 2024-02-11 00:57:52,183 (trainer:762) INFO: 41epoch:train:12401-12500batch: iter_time=8.563e-05, forward_time=0.309, loss_ctc=50.484, loss_att=49.381, acc=0.763, loss=49.712, backward_time=0.308, grad_norm=60.607, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.565e-04, train_time=1.148 +[gpub010:0/16] 2024-02-11 00:58:12,222 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-02-11 00:58:31,639 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 00:58:35,161 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 00:58:35,161 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-02-11 00:58:35,217 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 01:05:52,457 (trainer:762) INFO: 41epoch:train:12501-12600batch: iter_time=3.463, forward_time=0.351, loss_ctc=44.787, loss_att=46.147, acc=0.750, loss=45.739, backward_time=0.303, grad_norm=42.408, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.565e-04, train_time=4.802 +[gpub010:0/16] 2024-02-11 01:07:50,456 (trainer:762) INFO: 41epoch:train:12601-12700batch: iter_time=7.935e-05, forward_time=0.321, loss_ctc=49.537, loss_att=47.301, acc=0.760, loss=47.971, backward_time=0.306, grad_norm=44.834, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.565e-04, train_time=1.180 +[gpub010:0/16] 2024-02-11 01:10:10,142 (trainer:762) INFO: 41epoch:train:12701-12800batch: iter_time=8.520e-05, forward_time=0.316, loss_ctc=42.348, loss_att=49.760, acc=0.755, loss=47.536, backward_time=0.316, grad_norm=43.690, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.565e-04, train_time=1.397 +[gpub010:0/16] 2024-02-11 01:12:07,835 (trainer:762) INFO: 41epoch:train:12801-12900batch: iter_time=8.305e-05, forward_time=0.301, loss_ctc=41.717, loss_att=39.855, acc=0.773, loss=40.414, backward_time=0.306, grad_norm=37.976, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.565e-04, train_time=1.176 +[gpub010:0/16] 2024-02-11 01:14:18,372 (trainer:762) INFO: 41epoch:train:12901-13000batch: iter_time=8.827e-05, forward_time=0.306, loss_ctc=45.529, loss_att=45.959, acc=0.770, loss=45.830, backward_time=0.305, grad_norm=42.638, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.565e-04, train_time=1.305 +[gpub010:0/16] 2024-02-11 01:16:31,159 (trainer:762) INFO: 41epoch:train:13001-13100batch: iter_time=8.153e-05, forward_time=0.325, loss_ctc=46.154, loss_att=39.685, acc=0.776, loss=41.626, backward_time=0.308, grad_norm=43.496, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.564e-04, train_time=1.328 +[gpub010:0/16] 2024-02-11 01:18:29,571 (trainer:762) INFO: 41epoch:train:13101-13200batch: iter_time=8.392e-05, forward_time=0.305, loss_ctc=43.214, loss_att=46.714, acc=0.747, loss=45.664, backward_time=0.300, grad_norm=42.539, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.564e-04, train_time=1.183 +[gpub010:0/16] 2024-02-11 01:20:51,976 (trainer:762) INFO: 41epoch:train:13201-13300batch: iter_time=7.984e-05, forward_time=0.333, loss_ctc=43.169, loss_att=41.122, acc=0.781, loss=41.736, backward_time=0.327, grad_norm=38.062, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.564e-04, train_time=1.424 +[gpub010:0/16] 2024-02-11 01:22:54,492 (trainer:762) INFO: 41epoch:train:13301-13400batch: iter_time=8.221e-05, forward_time=0.311, loss_ctc=44.292, loss_att=47.416, acc=0.755, loss=46.479, backward_time=0.310, grad_norm=43.672, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.564e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 01:24:56,566 (trainer:762) INFO: 41epoch:train:13401-13500batch: iter_time=8.359e-05, forward_time=0.308, loss_ctc=43.805, loss_att=46.313, acc=0.767, loss=45.561, backward_time=0.299, grad_norm=39.608, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.564e-04, train_time=1.220 +[gpub010:0/16] 2024-02-11 01:27:06,751 (trainer:762) INFO: 41epoch:train:13501-13600batch: iter_time=9.652e-05, forward_time=0.329, loss_ctc=45.821, loss_att=48.672, acc=0.745, loss=47.817, backward_time=0.307, grad_norm=50.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.564e-04, train_time=1.302 +[gpub010:0/16] 2024-02-11 01:29:17,830 (trainer:762) INFO: 41epoch:train:13601-13700batch: iter_time=8.404e-05, forward_time=0.311, loss_ctc=48.331, loss_att=46.286, acc=0.751, loss=46.899, backward_time=0.297, grad_norm=54.203, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.564e-04, train_time=1.311 +[gpub010:0/16] 2024-02-11 01:30:39,725 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-02-11 01:30:58,886 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 01:31:02,344 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 01:31:02,344 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-02-11 01:31:02,388 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 01:36:51,855 (trainer:762) INFO: 41epoch:train:13701-13800batch: iter_time=3.328, forward_time=0.309, loss_ctc=47.008, loss_att=46.423, acc=0.757, loss=46.598, backward_time=0.297, grad_norm=44.472, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.564e-04, train_time=4.540 +[gpub010:0/16] 2024-02-11 01:39:01,683 (trainer:762) INFO: 41epoch:train:13801-13900batch: iter_time=8.658e-05, forward_time=0.336, loss_ctc=46.132, loss_att=43.991, acc=0.768, loss=44.634, backward_time=0.309, grad_norm=41.716, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.563e-04, train_time=1.297 +[gpub010:0/16] 2024-02-11 01:41:04,104 (trainer:762) INFO: 41epoch:train:13901-14000batch: iter_time=8.672e-05, forward_time=0.308, loss_ctc=41.416, loss_att=42.779, acc=0.777, loss=42.370, backward_time=0.302, grad_norm=39.875, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.563e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 01:43:00,220 (trainer:762) INFO: 41epoch:train:14001-14100batch: iter_time=8.793e-05, forward_time=0.312, loss_ctc=44.840, loss_att=49.054, acc=0.766, loss=47.790, backward_time=0.302, grad_norm=43.976, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.563e-04, train_time=1.161 +[gpub010:0/16] 2024-02-11 01:45:21,437 (trainer:762) INFO: 41epoch:train:14101-14200batch: iter_time=9.610e-05, forward_time=0.321, loss_ctc=44.379, loss_att=44.426, acc=0.776, loss=44.412, backward_time=0.312, grad_norm=42.471, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.563e-04, train_time=1.412 +[gpub010:0/16] 2024-02-11 01:47:19,765 (trainer:762) INFO: 41epoch:train:14201-14300batch: iter_time=9.408e-05, forward_time=0.309, loss_ctc=44.048, loss_att=42.652, acc=0.772, loss=43.071, backward_time=0.304, grad_norm=40.971, clip=100.000, loss_scale=7.529e+33, optim_step_time=0.094, optim0_lr0=1.563e-04, train_time=1.183 +[gpub010:0/16] 2024-02-11 01:49:19,621 (trainer:762) INFO: 41epoch:train:14301-14400batch: iter_time=9.441e-05, forward_time=0.326, loss_ctc=46.776, loss_att=40.493, acc=0.780, loss=42.378, backward_time=0.302, grad_norm=42.716, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.563e-04, train_time=1.199 +[gpub010:0/16] 2024-02-11 01:51:42,883 (trainer:762) INFO: 41epoch:train:14401-14500batch: iter_time=9.767e-05, forward_time=0.329, loss_ctc=41.367, loss_att=41.241, acc=0.775, loss=41.279, backward_time=0.310, grad_norm=41.128, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.563e-04, train_time=1.432 +[gpub010:0/16] 2024-02-11 01:53:51,810 (trainer:762) INFO: 41epoch:train:14501-14600batch: iter_time=8.899e-05, forward_time=0.310, loss_ctc=45.379, loss_att=48.013, acc=0.777, loss=47.223, backward_time=0.306, grad_norm=43.069, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.563e-04, train_time=1.289 +[gpub010:0/16] 2024-02-11 01:56:04,822 (trainer:762) INFO: 41epoch:train:14601-14700batch: iter_time=8.628e-05, forward_time=0.300, loss_ctc=43.290, loss_att=42.411, acc=0.773, loss=42.675, backward_time=0.298, grad_norm=38.228, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.562e-04, train_time=1.328 +[gpub010:0/16] 2024-02-11 01:58:15,932 (trainer:762) INFO: 41epoch:train:14701-14800batch: iter_time=9.261e-05, forward_time=0.315, loss_ctc=49.901, loss_att=50.069, acc=0.763, loss=50.018, backward_time=0.302, grad_norm=47.890, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.562e-04, train_time=1.313 +[gpub010:0/16] 2024-02-11 01:59:33,266 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 02:00:24,563 (trainer:762) INFO: 41epoch:train:14801-14900batch: iter_time=8.792e-05, forward_time=0.333, loss_ctc=39.536, loss_att=44.222, acc=0.761, loss=42.816, backward_time=0.322, grad_norm=43.411, clip=100.000, loss_scale=8.077e+33, optim_step_time=0.096, optim0_lr0=1.562e-04, train_time=1.286 +[gpub010:0/16] 2024-02-11 02:00:47,454 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 02:02:33,332 (trainer:762) INFO: 41epoch:train:14901-15000batch: iter_time=9.346e-05, forward_time=0.315, loss_ctc=51.170, loss_att=49.525, acc=0.767, loss=50.018, backward_time=0.304, grad_norm=63.571, clip=100.000, loss_scale=3.042e+33, optim_step_time=0.098, optim0_lr0=1.562e-04, train_time=1.288 +[gpub010:0/16] 2024-02-11 02:40:52,914 (trainer:361) INFO: 41epoch results: [train] iter_time=0.258, forward_time=0.318, loss_ctc=45.625, loss_att=45.425, acc=0.764, loss=45.485, backward_time=0.311, grad_norm=44.193, clip=100.000, loss_scale=5.791e+33, optim_step_time=0.096, optim0_lr0=1.572e-04, train_time=1.552, time=6 hours, 28 minutes and 21.76 seconds, total_count=645000, gpu_max_cached_mem_GB=42.092, [valid] loss_ctc=35.638, cer_ctc=0.182, loss_att=37.594, acc=0.693, cer=0.317, wer=0.986, loss=37.007, time=37 minutes and 54.45 seconds, total_count=200853, gpu_max_cached_mem_GB=42.092 +[gpub010:0/16] 2024-02-11 02:41:01,736 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-02-11 02:41:01,827 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/36epoch.pth +[gpub010:0/16] 2024-02-11 02:41:01,828 (trainer:290) INFO: 42/45epoch started. Estimated time to finish: 1 day, 5 hours and 36 minutes +[gpub010:0/16] 2024-02-11 02:41:01,838 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-02-11 02:41:20,365 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 02:41:23,734 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 02:41:23,734 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-02-11 02:41:23,737 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 02:47:54,564 (trainer:762) INFO: 42epoch:train:1-100batch: iter_time=2.999, forward_time=0.319, loss_ctc=50.314, loss_att=43.194, acc=0.754, loss=45.330, backward_time=0.300, grad_norm=49.466, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.562e-04, train_time=4.127 +[gpub010:0/16] 2024-02-11 02:49:57,121 (trainer:762) INFO: 42epoch:train:101-200batch: iter_time=8.272e-05, forward_time=0.297, loss_ctc=48.437, loss_att=50.456, acc=0.743, loss=49.851, backward_time=0.301, grad_norm=47.423, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.562e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 02:52:03,884 (trainer:762) INFO: 42epoch:train:201-300batch: iter_time=8.204e-05, forward_time=0.304, loss_ctc=44.721, loss_att=44.783, acc=0.774, loss=44.764, backward_time=0.301, grad_norm=40.985, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.562e-04, train_time=1.267 +[gpub010:0/16] 2024-02-11 02:54:14,857 (trainer:762) INFO: 42epoch:train:301-400batch: iter_time=9.269e-05, forward_time=0.368, loss_ctc=47.482, loss_att=50.788, acc=0.746, loss=49.796, backward_time=0.316, grad_norm=48.065, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.561e-04, train_time=1.310 +[gpub010:0/16] 2024-02-11 02:56:22,632 (trainer:762) INFO: 42epoch:train:401-500batch: iter_time=8.626e-05, forward_time=0.310, loss_ctc=47.250, loss_att=52.238, acc=0.733, loss=50.742, backward_time=0.296, grad_norm=42.694, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.561e-04, train_time=1.277 +[gpub010:0/16] 2024-02-11 02:58:26,427 (trainer:762) INFO: 42epoch:train:501-600batch: iter_time=8.388e-05, forward_time=0.291, loss_ctc=43.298, loss_att=46.578, acc=0.756, loss=45.594, backward_time=0.295, grad_norm=40.967, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.561e-04, train_time=1.238 +[gpub010:0/16] 2024-02-11 03:00:35,306 (trainer:762) INFO: 42epoch:train:601-700batch: iter_time=9.109e-05, forward_time=0.351, loss_ctc=34.458, loss_att=29.856, acc=0.799, loss=31.237, backward_time=0.330, grad_norm=34.214, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.107, optim0_lr0=1.561e-04, train_time=1.288 +[gpub010:0/16] 2024-02-11 03:02:41,397 (trainer:762) INFO: 42epoch:train:701-800batch: iter_time=8.150e-05, forward_time=0.303, loss_ctc=50.324, loss_att=48.098, acc=0.758, loss=48.766, backward_time=0.299, grad_norm=47.062, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.561e-04, train_time=1.261 +[gpub010:0/16] 2024-02-11 03:04:55,746 (trainer:762) INFO: 42epoch:train:801-900batch: iter_time=9.491e-05, forward_time=0.302, loss_ctc=51.431, loss_att=50.549, acc=0.736, loss=50.813, backward_time=0.303, grad_norm=52.228, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.561e-04, train_time=1.343 +[gpub010:0/16] 2024-02-11 03:06:59,181 (trainer:762) INFO: 42epoch:train:901-1000batch: iter_time=8.663e-05, forward_time=0.300, loss_ctc=38.532, loss_att=37.955, acc=0.772, loss=38.128, backward_time=0.309, grad_norm=39.989, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.561e-04, train_time=1.234 +[gpub010:0/16] 2024-02-11 03:09:11,478 (trainer:762) INFO: 42epoch:train:1001-1100batch: iter_time=8.417e-05, forward_time=0.337, loss_ctc=39.960, loss_att=38.206, acc=0.774, loss=38.732, backward_time=0.300, grad_norm=39.844, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.561e-04, train_time=1.323 +[gpub010:0/16] 2024-02-11 03:11:15,513 (trainer:762) INFO: 42epoch:train:1101-1200batch: iter_time=8.882e-05, forward_time=0.307, loss_ctc=48.728, loss_att=49.477, acc=0.750, loss=49.252, backward_time=0.299, grad_norm=55.643, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.560e-04, train_time=1.240 +[gpub010:0/16] 2024-02-11 03:12:37,892 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-02-11 03:12:56,981 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 03:13:00,765 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 03:13:00,765 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-02-11 03:13:00,768 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 03:18:43,523 (trainer:762) INFO: 42epoch:train:1201-1300batch: iter_time=3.233, forward_time=0.346, loss_ctc=51.192, loss_att=43.441, acc=0.759, loss=45.766, backward_time=0.303, grad_norm=47.487, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.560e-04, train_time=4.480 +[gpub010:0/16] 2024-02-11 03:20:43,686 (trainer:762) INFO: 42epoch:train:1301-1400batch: iter_time=7.756e-05, forward_time=0.292, loss_ctc=48.970, loss_att=41.152, acc=0.768, loss=43.497, backward_time=0.296, grad_norm=45.512, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.560e-04, train_time=1.201 +[gpub010:0/16] 2024-02-11 03:22:52,033 (trainer:762) INFO: 42epoch:train:1401-1500batch: iter_time=8.211e-05, forward_time=0.303, loss_ctc=47.151, loss_att=52.923, acc=0.762, loss=51.191, backward_time=0.309, grad_norm=42.886, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.560e-04, train_time=1.283 +[gpub010:0/16] 2024-02-11 03:25:02,225 (trainer:762) INFO: 42epoch:train:1501-1600batch: iter_time=8.445e-05, forward_time=0.367, loss_ctc=42.021, loss_att=45.394, acc=0.769, loss=44.382, backward_time=0.321, grad_norm=40.097, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.560e-04, train_time=1.301 +[gpub010:0/16] 2024-02-11 03:27:01,932 (trainer:762) INFO: 42epoch:train:1601-1700batch: iter_time=8.498e-05, forward_time=0.296, loss_ctc=44.888, loss_att=49.039, acc=0.751, loss=47.794, backward_time=0.304, grad_norm=44.513, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.560e-04, train_time=1.197 +[gpub010:0/16] 2024-02-11 03:29:16,554 (trainer:762) INFO: 42epoch:train:1701-1800batch: iter_time=8.540e-05, forward_time=0.307, loss_ctc=45.215, loss_att=48.510, acc=0.747, loss=47.522, backward_time=0.300, grad_norm=41.629, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.560e-04, train_time=1.346 +[gpub010:0/16] 2024-02-11 03:31:20,429 (trainer:762) INFO: 42epoch:train:1801-1900batch: iter_time=8.687e-05, forward_time=0.331, loss_ctc=41.624, loss_att=42.568, acc=0.798, loss=42.285, backward_time=0.335, grad_norm=37.431, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.100, optim0_lr0=1.560e-04, train_time=1.239 +[gpub010:0/16] 2024-02-11 03:33:22,624 (trainer:762) INFO: 42epoch:train:1901-2000batch: iter_time=8.936e-05, forward_time=0.292, loss_ctc=41.095, loss_att=37.136, acc=0.775, loss=38.324, backward_time=0.299, grad_norm=39.318, clip=100.000, loss_scale=4.725e+33, optim_step_time=0.093, optim0_lr0=1.559e-04, train_time=1.222 +[gpub010:0/16] 2024-02-11 03:35:30,488 (trainer:762) INFO: 42epoch:train:2001-2100batch: iter_time=8.591e-05, forward_time=0.317, loss_ctc=49.682, loss_att=46.817, acc=0.764, loss=47.677, backward_time=0.307, grad_norm=46.150, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.559e-04, train_time=1.279 +[gpub010:0/16] 2024-02-11 03:37:40,549 (trainer:762) INFO: 42epoch:train:2101-2200batch: iter_time=8.035e-05, forward_time=0.293, loss_ctc=44.471, loss_att=46.130, acc=0.752, loss=45.632, backward_time=0.294, grad_norm=41.521, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.559e-04, train_time=1.301 +[gpub010:0/16] 2024-02-11 03:39:51,140 (trainer:762) INFO: 42epoch:train:2201-2300batch: iter_time=8.357e-05, forward_time=0.354, loss_ctc=40.292, loss_att=40.981, acc=0.776, loss=40.775, backward_time=0.306, grad_norm=39.507, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.559e-04, train_time=1.306 +[gpub010:0/16] 2024-02-11 03:41:57,860 (trainer:762) INFO: 42epoch:train:2301-2400batch: iter_time=9.241e-05, forward_time=0.302, loss_ctc=42.379, loss_att=42.500, acc=0.768, loss=42.464, backward_time=0.319, grad_norm=43.345, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.559e-04, train_time=1.267 +[gpub010:0/16] 2024-02-11 03:44:04,902 (trainer:762) INFO: 42epoch:train:2401-2500batch: iter_time=8.448e-05, forward_time=0.292, loss_ctc=51.180, loss_att=45.769, acc=0.769, loss=47.392, backward_time=0.296, grad_norm=46.857, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.559e-04, train_time=1.270 +[gpub010:0/16] 2024-02-11 03:44:25,001 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-02-11 03:44:44,029 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 03:44:47,536 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 03:44:47,536 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-02-11 03:44:47,600 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 03:51:20,028 (trainer:762) INFO: 42epoch:train:2501-2600batch: iter_time=3.185, forward_time=0.325, loss_ctc=49.205, loss_att=42.128, acc=0.759, loss=44.251, backward_time=0.301, grad_norm=46.789, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.559e-04, train_time=4.351 +[gpub010:0/16] 2024-02-11 03:53:26,309 (trainer:762) INFO: 42epoch:train:2601-2700batch: iter_time=8.243e-05, forward_time=0.315, loss_ctc=47.777, loss_att=50.194, acc=0.746, loss=49.469, backward_time=0.303, grad_norm=48.858, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.559e-04, train_time=1.263 +[gpub010:0/16] 2024-02-11 03:55:35,577 (trainer:762) INFO: 42epoch:train:2701-2800batch: iter_time=8.294e-05, forward_time=0.292, loss_ctc=44.218, loss_att=44.519, acc=0.776, loss=44.429, backward_time=0.297, grad_norm=40.413, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.558e-04, train_time=1.292 +[gpub010:0/16] 2024-02-11 03:57:40,795 (trainer:762) INFO: 42epoch:train:2801-2900batch: iter_time=3.083e-04, forward_time=0.366, loss_ctc=46.653, loss_att=50.416, acc=0.749, loss=49.287, backward_time=0.314, grad_norm=45.420, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.558e-04, train_time=1.252 +[gpub010:0/16] 2024-02-11 03:59:47,224 (trainer:762) INFO: 42epoch:train:2901-3000batch: iter_time=8.432e-05, forward_time=0.306, loss_ctc=46.383, loss_att=51.283, acc=0.738, loss=49.813, backward_time=0.320, grad_norm=42.252, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.558e-04, train_time=1.264 +[gpub010:0/16] 2024-02-11 04:01:55,914 (trainer:762) INFO: 42epoch:train:3001-3100batch: iter_time=8.559e-05, forward_time=0.325, loss_ctc=42.667, loss_att=45.776, acc=0.762, loss=44.843, backward_time=0.294, grad_norm=39.557, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.558e-04, train_time=1.286 +[gpub010:0/16] 2024-02-11 04:04:00,437 (trainer:762) INFO: 42epoch:train:3101-3200batch: iter_time=8.702e-05, forward_time=0.321, loss_ctc=34.550, loss_att=29.720, acc=0.802, loss=31.169, backward_time=0.316, grad_norm=33.823, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.558e-04, train_time=1.245 +[gpub010:0/16] 2024-02-11 04:06:11,351 (trainer:762) INFO: 42epoch:train:3201-3300batch: iter_time=8.947e-05, forward_time=0.311, loss_ctc=49.700, loss_att=47.598, acc=0.760, loss=48.228, backward_time=0.317, grad_norm=45.046, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.558e-04, train_time=1.309 +[gpub010:0/16] 2024-02-11 04:08:01,436 (trainer:762) INFO: 42epoch:train:3301-3400batch: iter_time=8.307e-05, forward_time=0.291, loss_ctc=50.695, loss_att=49.884, acc=0.740, loss=50.127, backward_time=0.297, grad_norm=48.657, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.558e-04, train_time=1.101 +[gpub010:0/16] 2024-02-11 04:10:19,818 (trainer:762) INFO: 42epoch:train:3401-3500batch: iter_time=9.563e-05, forward_time=0.323, loss_ctc=37.926, loss_att=37.779, acc=0.773, loss=37.823, backward_time=0.333, grad_norm=39.266, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.558e-04, train_time=1.384 +[gpub010:0/16] 2024-02-11 04:12:20,507 (trainer:762) INFO: 42epoch:train:3501-3600batch: iter_time=8.939e-05, forward_time=0.289, loss_ctc=39.495, loss_att=37.880, acc=0.777, loss=38.365, backward_time=0.294, grad_norm=39.211, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.557e-04, train_time=1.206 +[gpub010:0/16] 2024-02-11 04:14:19,756 (trainer:762) INFO: 42epoch:train:3601-3700batch: iter_time=9.394e-05, forward_time=0.304, loss_ctc=47.380, loss_att=48.540, acc=0.753, loss=48.192, backward_time=0.317, grad_norm=50.563, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.557e-04, train_time=1.193 +[gpub010:0/16] 2024-02-11 04:15:33,721 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-02-11 04:15:52,813 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 04:15:56,340 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 04:15:56,340 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-02-11 04:15:56,359 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 04:21:45,916 (trainer:762) INFO: 42epoch:train:3701-3800batch: iter_time=3.241, forward_time=0.344, loss_ctc=49.291, loss_att=42.052, acc=0.766, loss=44.223, backward_time=0.306, grad_norm=46.508, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.557e-04, train_time=4.461 +[gpub010:0/16] 2024-02-11 04:23:46,830 (trainer:762) INFO: 42epoch:train:3801-3900batch: iter_time=8.732e-05, forward_time=0.317, loss_ctc=48.113, loss_att=40.915, acc=0.773, loss=43.074, backward_time=0.301, grad_norm=42.324, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.557e-04, train_time=1.208 +[gpub010:0/16] 2024-02-11 04:26:03,676 (trainer:762) INFO: 42epoch:train:3901-4000batch: iter_time=8.250e-05, forward_time=0.294, loss_ctc=46.197, loss_att=52.586, acc=0.765, loss=50.669, backward_time=0.299, grad_norm=42.762, clip=100.000, loss_scale=9.450e+33, optim_step_time=0.093, optim0_lr0=1.557e-04, train_time=1.369 +[gpub010:0/16] 2024-02-11 04:28:06,306 (trainer:762) INFO: 42epoch:train:4001-4100batch: iter_time=8.376e-05, forward_time=0.327, loss_ctc=41.915, loss_att=45.119, acc=0.772, loss=44.158, backward_time=0.346, grad_norm=38.663, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.557e-04, train_time=1.226 +[gpub010:0/16] 2024-02-11 04:30:09,221 (trainer:762) INFO: 42epoch:train:4101-4200batch: iter_time=8.087e-05, forward_time=0.294, loss_ctc=44.360, loss_att=48.660, acc=0.755, loss=47.370, backward_time=0.298, grad_norm=43.623, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.557e-04, train_time=1.229 +[gpub010:0/16] 2024-02-11 04:32:28,571 (trainer:762) INFO: 42epoch:train:4201-4300batch: iter_time=8.293e-05, forward_time=0.311, loss_ctc=44.750, loss_att=48.182, acc=0.751, loss=47.152, backward_time=0.298, grad_norm=40.771, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.557e-04, train_time=1.392 +[gpub010:0/16] 2024-02-11 04:34:19,733 (trainer:762) INFO: 42epoch:train:4301-4400batch: iter_time=8.100e-05, forward_time=0.294, loss_ctc=41.547, loss_att=42.701, acc=0.800, loss=42.355, backward_time=0.300, grad_norm=35.804, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.556e-04, train_time=1.113 +[gpub010:0/16] 2024-02-11 04:36:37,663 (trainer:762) INFO: 42epoch:train:4401-4500batch: iter_time=5.485e-04, forward_time=0.339, loss_ctc=41.069, loss_att=37.180, acc=0.777, loss=38.346, backward_time=0.338, grad_norm=40.610, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.556e-04, train_time=1.379 +[gpub010:0/16] 2024-02-11 04:38:48,497 (trainer:762) INFO: 42epoch:train:4501-4600batch: iter_time=8.402e-05, forward_time=0.290, loss_ctc=48.961, loss_att=45.912, acc=0.766, loss=46.827, backward_time=0.296, grad_norm=45.106, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.556e-04, train_time=1.308 +[gpub010:0/16] 2024-02-11 04:40:50,313 (trainer:762) INFO: 42epoch:train:4601-4700batch: iter_time=8.635e-05, forward_time=0.313, loss_ctc=43.750, loss_att=46.043, acc=0.755, loss=45.355, backward_time=0.309, grad_norm=41.169, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.556e-04, train_time=1.217 +[gpub010:0/16] 2024-02-11 04:42:18,223 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 04:43:17,144 (trainer:762) INFO: 42epoch:train:4701-4800batch: iter_time=8.091e-05, forward_time=0.370, loss_ctc=40.369, loss_att=41.159, acc=0.775, loss=40.922, backward_time=0.314, grad_norm=39.008, clip=100.000, loss_scale=8.182e+33, optim_step_time=0.096, optim0_lr0=1.556e-04, train_time=1.469 +[gpub010:0/16] 2024-02-11 04:45:14,184 (trainer:762) INFO: 42epoch:train:4801-4900batch: iter_time=7.948e-05, forward_time=0.291, loss_ctc=41.546, loss_att=41.892, acc=0.772, loss=41.788, backward_time=0.296, grad_norm=43.341, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.556e-04, train_time=1.170 +[gpub010:0/16] 2024-02-11 04:47:39,119 (trainer:762) INFO: 42epoch:train:4901-5000batch: iter_time=7.799e-05, forward_time=0.304, loss_ctc=50.367, loss_att=45.149, acc=0.770, loss=46.714, backward_time=0.309, grad_norm=46.946, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.556e-04, train_time=1.449 +[gpub010:0/16] 2024-02-11 04:47:59,149 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-02-11 04:48:18,084 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 04:48:21,588 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 04:48:21,588 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-02-11 04:48:21,625 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 04:55:04,851 (trainer:762) INFO: 42epoch:train:5001-5100batch: iter_time=3.305, forward_time=0.337, loss_ctc=49.484, loss_att=41.131, acc=0.769, loss=43.637, backward_time=0.303, grad_norm=47.107, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.556e-04, train_time=4.457 +[gpub010:0/16] 2024-02-11 04:57:10,456 (trainer:762) INFO: 42epoch:train:5101-5200batch: iter_time=7.956e-05, forward_time=0.292, loss_ctc=46.950, loss_att=51.058, acc=0.751, loss=49.825, backward_time=0.298, grad_norm=46.099, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.555e-04, train_time=1.256 +[gpub010:0/16] 2024-02-11 04:59:36,225 (trainer:762) INFO: 42epoch:train:5201-5300batch: iter_time=7.806e-05, forward_time=0.375, loss_ctc=43.880, loss_att=43.683, acc=0.786, loss=43.742, backward_time=0.326, grad_norm=40.582, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.555e-04, train_time=1.457 +[gpub010:0/16] 2024-02-11 05:01:29,164 (trainer:762) INFO: 42epoch:train:5301-5400batch: iter_time=7.960e-05, forward_time=0.293, loss_ctc=46.075, loss_att=50.092, acc=0.765, loss=48.887, backward_time=0.299, grad_norm=44.731, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.555e-04, train_time=1.129 +[gpub010:0/16] 2024-02-11 05:03:35,133 (trainer:762) INFO: 42epoch:train:5401-5500batch: iter_time=8.054e-05, forward_time=0.316, loss_ctc=46.238, loss_att=52.396, acc=0.743, loss=50.549, backward_time=0.308, grad_norm=41.959, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.555e-04, train_time=1.260 +[gpub010:0/16] 2024-02-11 05:05:38,847 (trainer:762) INFO: 42epoch:train:5501-5600batch: iter_time=2.459e-04, forward_time=0.329, loss_ctc=42.086, loss_att=46.765, acc=0.766, loss=45.361, backward_time=0.326, grad_norm=39.032, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.555e-04, train_time=1.236 +[gpub010:0/16] 2024-02-11 05:07:47,077 (trainer:762) INFO: 42epoch:train:5601-5700batch: iter_time=8.277e-05, forward_time=0.290, loss_ctc=34.663, loss_att=29.760, acc=0.805, loss=31.231, backward_time=0.293, grad_norm=33.840, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.555e-04, train_time=1.283 +[gpub010:0/16] 2024-02-11 05:09:59,878 (trainer:762) INFO: 42epoch:train:5701-5800batch: iter_time=8.219e-05, forward_time=0.306, loss_ctc=49.123, loss_att=46.509, acc=0.774, loss=47.293, backward_time=0.317, grad_norm=45.226, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.555e-04, train_time=1.328 +[gpub010:0/16] 2024-02-11 05:12:01,835 (trainer:762) INFO: 42epoch:train:5801-5900batch: iter_time=2.047e-04, forward_time=0.358, loss_ctc=49.686, loss_att=49.997, acc=0.742, loss=49.904, backward_time=0.323, grad_norm=47.300, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.555e-04, train_time=1.219 +[gpub010:0/16] 2024-02-11 05:14:05,038 (trainer:762) INFO: 42epoch:train:5901-6000batch: iter_time=8.616e-05, forward_time=0.290, loss_ctc=37.468, loss_att=37.237, acc=0.786, loss=37.306, backward_time=0.294, grad_norm=37.125, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.554e-04, train_time=1.232 +[gpub010:0/16] 2024-02-11 05:16:13,814 (trainer:762) INFO: 42epoch:train:6001-6100batch: iter_time=9.314e-05, forward_time=0.300, loss_ctc=39.507, loss_att=38.095, acc=0.780, loss=38.518, backward_time=0.311, grad_norm=40.173, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.554e-04, train_time=1.288 +[gpub010:0/16] 2024-02-11 05:18:13,574 (trainer:762) INFO: 42epoch:train:6101-6200batch: iter_time=2.558e-04, forward_time=0.344, loss_ctc=46.923, loss_att=48.418, acc=0.759, loss=47.970, backward_time=0.328, grad_norm=48.594, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.554e-04, train_time=1.197 +[gpub010:0/16] 2024-02-11 05:19:27,765 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-02-11 05:19:47,320 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 05:19:50,909 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 05:19:50,909 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-02-11 05:19:50,914 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 05:24:51,756 (trainer:762) INFO: 42epoch:train:6201-6300batch: iter_time=2.836, forward_time=0.300, loss_ctc=49.705, loss_att=42.853, acc=0.761, loss=44.908, backward_time=0.297, grad_norm=45.960, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.554e-04, train_time=3.982 +[gpub010:0/16] 2024-02-11 05:27:06,351 (trainer:762) INFO: 42epoch:train:6301-6400batch: iter_time=8.228e-05, forward_time=0.341, loss_ctc=48.421, loss_att=41.899, acc=0.766, loss=43.856, backward_time=0.336, grad_norm=45.536, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.554e-04, train_time=1.346 +[gpub010:0/16] 2024-02-11 05:29:00,583 (trainer:762) INFO: 42epoch:train:6401-6500batch: iter_time=8.160e-05, forward_time=0.293, loss_ctc=46.482, loss_att=51.548, acc=0.760, loss=50.028, backward_time=0.300, grad_norm=45.632, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.554e-04, train_time=1.142 +[gpub010:0/16] 2024-02-11 05:31:15,952 (trainer:762) INFO: 42epoch:train:6501-6600batch: iter_time=8.499e-05, forward_time=0.303, loss_ctc=41.496, loss_att=45.187, acc=0.768, loss=44.080, backward_time=0.308, grad_norm=40.406, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.554e-04, train_time=1.353 +[gpub010:0/16] 2024-02-11 05:33:16,879 (trainer:762) INFO: 42epoch:train:6601-6700batch: iter_time=8.320e-05, forward_time=0.353, loss_ctc=43.837, loss_att=47.683, acc=0.751, loss=46.529, backward_time=0.316, grad_norm=46.509, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.554e-04, train_time=1.209 +[gpub010:0/16] 2024-02-11 05:35:30,389 (trainer:762) INFO: 42epoch:train:6701-6800batch: iter_time=8.088e-05, forward_time=0.302, loss_ctc=44.214, loss_att=47.995, acc=0.742, loss=46.861, backward_time=0.296, grad_norm=41.079, clip=100.000, loss_scale=7.373e+33, optim_step_time=0.094, optim0_lr0=1.553e-04, train_time=1.335 +[gpub010:0/16] 2024-02-11 05:37:23,030 (trainer:762) INFO: 42epoch:train:6801-6900batch: iter_time=8.060e-05, forward_time=0.302, loss_ctc=41.111, loss_att=42.107, acc=0.799, loss=41.808, backward_time=0.304, grad_norm=38.131, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.553e-04, train_time=1.127 +[gpub010:0/16] 2024-02-11 05:39:36,790 (trainer:762) INFO: 42epoch:train:6901-7000batch: iter_time=8.056e-05, forward_time=0.343, loss_ctc=40.584, loss_att=36.992, acc=0.775, loss=38.070, backward_time=0.337, grad_norm=41.373, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.553e-04, train_time=1.337 +[gpub010:0/16] 2024-02-11 05:41:43,260 (trainer:762) INFO: 42epoch:train:7001-7100batch: iter_time=7.913e-05, forward_time=0.296, loss_ctc=48.599, loss_att=45.668, acc=0.762, loss=46.547, backward_time=0.305, grad_norm=44.498, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.553e-04, train_time=1.265 +[gpub010:0/16] 2024-02-11 05:43:45,445 (trainer:762) INFO: 42epoch:train:7101-7200batch: iter_time=7.876e-05, forward_time=0.290, loss_ctc=43.681, loss_att=45.981, acc=0.748, loss=45.291, backward_time=0.295, grad_norm=42.485, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.553e-04, train_time=1.222 +[gpub010:0/16] 2024-02-11 05:46:07,864 (trainer:762) INFO: 42epoch:train:7201-7300batch: iter_time=1.979e-04, forward_time=0.331, loss_ctc=39.949, loss_att=41.565, acc=0.767, loss=41.080, backward_time=0.329, grad_norm=39.206, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.553e-04, train_time=1.424 +[gpub010:0/16] 2024-02-11 05:48:12,569 (trainer:762) INFO: 42epoch:train:7301-7400batch: iter_time=7.602e-05, forward_time=0.298, loss_ctc=41.923, loss_att=40.998, acc=0.776, loss=41.275, backward_time=0.307, grad_norm=43.602, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.553e-04, train_time=1.246 +[gpub010:0/16] 2024-02-11 05:49:57,678 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 05:50:21,467 (trainer:762) INFO: 42epoch:train:7401-7500batch: iter_time=7.452e-05, forward_time=0.295, loss_ctc=50.276, loss_att=44.425, acc=0.769, loss=46.180, backward_time=0.298, grad_norm=45.691, clip=100.000, loss_scale=9.283e+33, optim_step_time=0.093, optim0_lr0=1.553e-04, train_time=1.289 +[gpub010:0/16] 2024-02-11 05:50:41,517 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-02-11 05:51:00,743 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 05:51:04,535 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 05:51:04,535 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-02-11 05:51:04,539 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 05:57:41,673 (trainer:762) INFO: 42epoch:train:7501-7600batch: iter_time=3.215, forward_time=0.360, loss_ctc=48.410, loss_att=42.548, acc=0.768, loss=44.307, backward_time=0.305, grad_norm=46.542, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.552e-04, train_time=4.402 +[gpub010:0/16] 2024-02-11 05:59:36,016 (trainer:762) INFO: 42epoch:train:7601-7700batch: iter_time=7.929e-05, forward_time=0.295, loss_ctc=46.803, loss_att=51.277, acc=0.751, loss=49.935, backward_time=0.311, grad_norm=44.120, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.552e-04, train_time=1.143 +[gpub010:0/16] 2024-02-11 06:01:05,960 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 06:02:01,400 (trainer:762) INFO: 42epoch:train:7701-7800batch: iter_time=8.301e-05, forward_time=0.370, loss_ctc=43.674, loss_att=44.064, acc=0.786, loss=43.947, backward_time=0.316, grad_norm=40.280, clip=100.000, loss_scale=4.196e+33, optim_step_time=0.110, optim0_lr0=1.552e-04, train_time=1.454 +[gpub010:0/16] 2024-02-11 06:04:05,221 (trainer:762) INFO: 42epoch:train:7801-7900batch: iter_time=8.108e-05, forward_time=0.294, loss_ctc=45.849, loss_att=49.981, acc=0.764, loss=48.742, backward_time=0.299, grad_norm=45.696, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.552e-04, train_time=1.238 +[gpub010:0/16] 2024-02-11 06:06:11,443 (trainer:762) INFO: 42epoch:train:7901-8000batch: iter_time=8.316e-05, forward_time=0.299, loss_ctc=46.312, loss_att=52.562, acc=0.743, loss=50.687, backward_time=0.307, grad_norm=42.625, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.552e-04, train_time=1.262 +[gpub010:0/16] 2024-02-11 06:08:25,498 (trainer:762) INFO: 42epoch:train:8001-8100batch: iter_time=8.532e-05, forward_time=0.368, loss_ctc=42.183, loss_att=46.741, acc=0.768, loss=45.374, backward_time=0.334, grad_norm=41.502, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.552e-04, train_time=1.340 +[gpub010:0/16] 2024-02-11 06:10:35,873 (trainer:762) INFO: 42epoch:train:8101-8200batch: iter_time=8.269e-05, forward_time=0.290, loss_ctc=34.234, loss_att=29.560, acc=0.805, loss=30.962, backward_time=0.294, grad_norm=32.657, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.552e-04, train_time=1.304 +[gpub010:0/16] 2024-02-11 06:12:35,121 (trainer:762) INFO: 42epoch:train:8201-8300batch: iter_time=8.328e-05, forward_time=0.299, loss_ctc=49.012, loss_att=46.333, acc=0.774, loss=47.137, backward_time=0.302, grad_norm=45.920, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.552e-04, train_time=1.192 +[gpub010:0/16] 2024-02-11 06:15:08,797 (trainer:762) INFO: 42epoch:train:8301-8400batch: iter_time=8.762e-05, forward_time=0.400, loss_ctc=49.613, loss_att=50.942, acc=0.741, loss=50.543, backward_time=0.326, grad_norm=48.122, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.551e-04, train_time=1.537 +[gpub010:0/16] 2024-02-11 06:16:58,982 (trainer:762) INFO: 42epoch:train:8401-8500batch: iter_time=8.822e-05, forward_time=0.295, loss_ctc=37.470, loss_att=37.232, acc=0.787, loss=37.303, backward_time=0.304, grad_norm=37.752, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.551e-04, train_time=1.102 +[gpub010:0/16] 2024-02-11 06:19:12,166 (trainer:762) INFO: 42epoch:train:8501-8600batch: iter_time=8.885e-05, forward_time=0.292, loss_ctc=39.233, loss_att=37.952, acc=0.782, loss=38.337, backward_time=0.297, grad_norm=39.874, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.551e-04, train_time=1.330 +[gpub010:0/16] 2024-02-11 06:21:30,211 (trainer:762) INFO: 42epoch:train:8601-8700batch: iter_time=9.088e-05, forward_time=0.344, loss_ctc=46.425, loss_att=48.240, acc=0.759, loss=47.696, backward_time=0.335, grad_norm=49.762, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.551e-04, train_time=1.382 +[gpub010:0/16] 2024-02-11 06:22:47,070 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-02-11 06:23:06,342 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 06:23:10,104 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 06:23:10,105 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-02-11 06:23:10,108 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 06:28:47,593 (trainer:762) INFO: 42epoch:train:8701-8800batch: iter_time=3.095, forward_time=0.298, loss_ctc=49.195, loss_att=42.239, acc=0.763, loss=44.325, backward_time=0.296, grad_norm=44.998, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.551e-04, train_time=4.374 +[gpub010:0/16] 2024-02-11 06:31:06,526 (trainer:762) INFO: 42epoch:train:8801-8900batch: iter_time=8.241e-05, forward_time=0.353, loss_ctc=47.899, loss_att=40.821, acc=0.770, loss=42.944, backward_time=0.315, grad_norm=45.185, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.551e-04, train_time=1.389 +[gpub010:0/16] 2024-02-11 06:32:58,061 (trainer:762) INFO: 42epoch:train:8901-9000batch: iter_time=8.349e-05, forward_time=0.295, loss_ctc=45.751, loss_att=50.194, acc=0.763, loss=48.861, backward_time=0.300, grad_norm=42.858, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.551e-04, train_time=1.115 +[gpub010:0/16] 2024-02-11 06:34:59,404 (trainer:762) INFO: 42epoch:train:9001-9100batch: iter_time=8.162e-05, forward_time=0.301, loss_ctc=41.264, loss_att=44.689, acc=0.768, loss=43.661, backward_time=0.299, grad_norm=41.389, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.551e-04, train_time=1.214 +[gpub010:0/16] 2024-02-11 06:37:14,096 (trainer:762) INFO: 42epoch:train:9101-9200batch: iter_time=8.253e-05, forward_time=0.291, loss_ctc=43.569, loss_att=47.148, acc=0.753, loss=46.074, backward_time=0.296, grad_norm=44.175, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.550e-04, train_time=1.347 +[gpub010:0/16] 2024-02-11 06:39:25,949 (trainer:762) INFO: 42epoch:train:9201-9300batch: iter_time=8.378e-05, forward_time=0.397, loss_ctc=44.426, loss_att=47.966, acc=0.742, loss=46.904, backward_time=0.312, grad_norm=42.337, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.550e-04, train_time=1.318 +[gpub010:0/16] 2024-02-11 06:41:50,028 (trainer:762) INFO: 42epoch:train:9301-9400batch: iter_time=8.283e-05, forward_time=0.299, loss_ctc=41.096, loss_att=41.755, acc=0.800, loss=41.557, backward_time=0.301, grad_norm=35.569, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.550e-04, train_time=1.440 +[gpub010:0/16] 2024-02-11 06:43:40,929 (trainer:762) INFO: 42epoch:train:9401-9500batch: iter_time=8.540e-05, forward_time=0.290, loss_ctc=40.374, loss_att=37.104, acc=0.776, loss=38.085, backward_time=0.296, grad_norm=42.572, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.550e-04, train_time=1.110 +[gpub010:0/16] 2024-02-11 06:45:52,293 (trainer:762) INFO: 42epoch:train:9501-9600batch: iter_time=8.351e-05, forward_time=0.365, loss_ctc=48.441, loss_att=45.259, acc=0.765, loss=46.214, backward_time=0.310, grad_norm=44.214, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.550e-04, train_time=1.313 +[gpub010:0/16] 2024-02-11 06:48:13,076 (trainer:762) INFO: 42epoch:train:9601-9700batch: iter_time=8.158e-05, forward_time=0.297, loss_ctc=43.404, loss_att=45.322, acc=0.751, loss=44.747, backward_time=0.299, grad_norm=44.128, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.550e-04, train_time=1.407 +[gpub010:0/16] 2024-02-11 06:50:14,393 (trainer:762) INFO: 42epoch:train:9701-9800batch: iter_time=1.003e-04, forward_time=0.291, loss_ctc=39.867, loss_att=40.924, acc=0.770, loss=40.607, backward_time=0.295, grad_norm=40.776, clip=100.000, loss_scale=3.583e+33, optim_step_time=0.094, optim0_lr0=1.550e-04, train_time=1.213 +[gpub010:0/16] 2024-02-11 06:52:16,372 (trainer:762) INFO: 42epoch:train:9801-9900batch: iter_time=8.008e-05, forward_time=0.290, loss_ctc=41.044, loss_att=40.621, acc=0.778, loss=40.748, backward_time=0.295, grad_norm=43.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.550e-04, train_time=1.220 +[gpub010:0/16] 2024-02-11 06:54:40,359 (trainer:762) INFO: 42epoch:train:9901-10000batch: iter_time=8.256e-05, forward_time=0.408, loss_ctc=49.397, loss_att=44.200, acc=0.769, loss=45.759, backward_time=0.312, grad_norm=46.661, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.114, optim0_lr0=1.549e-04, train_time=1.439 +[gpub010:0/16] 2024-02-11 06:55:00,387 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-02-11 06:55:19,588 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 06:55:23,142 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 06:55:23,142 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-02-11 06:55:23,187 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 07:01:34,296 (trainer:762) INFO: 42epoch:train:10001-10100batch: iter_time=2.960, forward_time=0.372, loss_ctc=48.058, loss_att=41.876, acc=0.769, loss=43.731, backward_time=0.308, grad_norm=45.292, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.549e-04, train_time=4.139 +[gpub010:0/16] 2024-02-11 07:03:47,641 (trainer:762) INFO: 42epoch:train:10101-10200batch: iter_time=9.043e-05, forward_time=0.292, loss_ctc=46.409, loss_att=51.173, acc=0.752, loss=49.744, backward_time=0.296, grad_norm=45.531, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.549e-04, train_time=1.333 +[gpub010:0/16] 2024-02-11 07:06:03,958 (trainer:762) INFO: 42epoch:train:10201-10300batch: iter_time=9.577e-05, forward_time=0.389, loss_ctc=43.332, loss_att=43.887, acc=0.787, loss=43.720, backward_time=0.314, grad_norm=40.342, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.549e-04, train_time=1.363 +[gpub010:0/16] 2024-02-11 07:08:14,179 (trainer:762) INFO: 42epoch:train:10301-10400batch: iter_time=8.928e-05, forward_time=0.295, loss_ctc=45.559, loss_att=50.332, acc=0.765, loss=48.900, backward_time=0.299, grad_norm=44.836, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.549e-04, train_time=1.302 +[gpub010:0/16] 2024-02-11 07:10:18,435 (trainer:762) INFO: 42epoch:train:10401-10500batch: iter_time=8.901e-05, forward_time=0.373, loss_ctc=46.297, loss_att=52.218, acc=0.743, loss=50.441, backward_time=0.337, grad_norm=43.676, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.549e-04, train_time=1.242 +[gpub010:0/16] 2024-02-11 07:12:35,080 (trainer:762) INFO: 42epoch:train:10501-10600batch: iter_time=9.574e-05, forward_time=0.301, loss_ctc=41.768, loss_att=46.147, acc=0.769, loss=44.833, backward_time=0.301, grad_norm=41.149, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.549e-04, train_time=1.366 +[gpub010:0/16] 2024-02-11 07:14:37,335 (trainer:762) INFO: 42epoch:train:10601-10700batch: iter_time=8.656e-05, forward_time=0.294, loss_ctc=33.884, loss_att=29.326, acc=0.806, loss=30.694, backward_time=0.304, grad_norm=31.487, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.549e-04, train_time=1.222 +[gpub010:0/16] 2024-02-11 07:16:52,273 (trainer:762) INFO: 42epoch:train:10701-10800batch: iter_time=8.846e-05, forward_time=0.335, loss_ctc=48.838, loss_att=46.716, acc=0.773, loss=47.353, backward_time=0.334, grad_norm=43.846, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.548e-04, train_time=1.349 +[gpub010:0/16] 2024-02-11 07:19:00,692 (trainer:762) INFO: 42epoch:train:10801-10900batch: iter_time=8.598e-05, forward_time=0.306, loss_ctc=49.117, loss_att=50.024, acc=0.743, loss=49.752, backward_time=0.301, grad_norm=48.114, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.548e-04, train_time=1.284 +[gpub010:0/16] 2024-02-11 07:21:14,239 (trainer:762) INFO: 42epoch:train:10901-11000batch: iter_time=9.952e-05, forward_time=0.367, loss_ctc=37.245, loss_att=37.388, acc=0.787, loss=37.345, backward_time=0.324, grad_norm=35.528, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.548e-04, train_time=1.335 +[gpub010:0/16] 2024-02-11 07:23:16,235 (trainer:762) INFO: 42epoch:train:11001-11100batch: iter_time=8.587e-05, forward_time=0.289, loss_ctc=38.943, loss_att=37.486, acc=0.783, loss=37.923, backward_time=0.294, grad_norm=37.268, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.548e-04, train_time=1.220 +[gpub010:0/16] 2024-02-11 07:25:35,555 (trainer:762) INFO: 42epoch:train:11101-11200batch: iter_time=8.667e-05, forward_time=0.307, loss_ctc=46.463, loss_att=48.492, acc=0.760, loss=47.883, backward_time=0.307, grad_norm=50.647, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.548e-04, train_time=1.393 +[gpub010:0/16] 2024-02-11 07:27:05,374 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-02-11 07:27:24,816 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 07:27:28,333 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 07:27:28,333 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-02-11 07:27:28,346 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 07:33:05,852 (trainer:762) INFO: 42epoch:train:11201-11300batch: iter_time=3.090, forward_time=0.435, loss_ctc=49.628, loss_att=42.224, acc=0.763, loss=44.445, backward_time=0.314, grad_norm=46.950, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.548e-04, train_time=4.503 +[gpub010:0/16] 2024-02-11 07:35:15,215 (trainer:762) INFO: 42epoch:train:11301-11400batch: iter_time=7.898e-05, forward_time=0.329, loss_ctc=47.632, loss_att=40.497, acc=0.771, loss=42.638, backward_time=0.351, grad_norm=46.409, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.548e-04, train_time=1.293 +[gpub010:0/16] 2024-02-11 07:37:40,597 (trainer:762) INFO: 42epoch:train:11401-11500batch: iter_time=7.855e-05, forward_time=0.312, loss_ctc=46.106, loss_att=50.813, acc=0.762, loss=49.400, backward_time=0.301, grad_norm=43.231, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.548e-04, train_time=1.454 +[gpub010:0/16] 2024-02-11 07:39:39,830 (trainer:762) INFO: 42epoch:train:11501-11600batch: iter_time=8.264e-05, forward_time=0.373, loss_ctc=40.676, loss_att=44.726, acc=0.768, loss=43.511, backward_time=0.309, grad_norm=41.084, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.547e-04, train_time=1.193 +[gpub010:0/16] 2024-02-11 07:41:55,067 (trainer:762) INFO: 42epoch:train:11601-11700batch: iter_time=8.957e-05, forward_time=0.297, loss_ctc=43.551, loss_att=47.193, acc=0.754, loss=46.100, backward_time=0.300, grad_norm=45.360, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.547e-04, train_time=1.351 +[gpub010:0/16] 2024-02-11 07:43:25,835 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 07:43:51,331 (trainer:762) INFO: 42epoch:train:11701-11800batch: iter_time=8.434e-05, forward_time=0.290, loss_ctc=44.364, loss_att=47.982, acc=0.741, loss=46.897, backward_time=0.294, grad_norm=41.136, clip=100.000, loss_scale=6.084e+33, optim_step_time=0.093, optim0_lr0=1.547e-04, train_time=1.163 +[gpub010:0/16] 2024-02-11 07:46:07,045 (trainer:762) INFO: 42epoch:train:11801-11900batch: iter_time=1.010e-04, forward_time=0.396, loss_ctc=40.906, loss_att=41.835, acc=0.800, loss=41.556, backward_time=0.309, grad_norm=36.713, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.547e-04, train_time=1.357 +[gpub010:0/16] 2024-02-11 07:48:22,687 (trainer:762) INFO: 42epoch:train:11901-12000batch: iter_time=8.774e-05, forward_time=0.306, loss_ctc=40.438, loss_att=36.347, acc=0.779, loss=37.574, backward_time=0.295, grad_norm=40.619, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.547e-04, train_time=1.355 +[gpub010:0/16] 2024-02-11 07:50:26,632 (trainer:762) INFO: 42epoch:train:12001-12100batch: iter_time=8.441e-05, forward_time=0.358, loss_ctc=48.195, loss_att=45.097, acc=0.764, loss=46.027, backward_time=0.363, grad_norm=42.681, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.547e-04, train_time=1.241 +[gpub010:0/16] 2024-02-11 07:52:38,629 (trainer:762) INFO: 42epoch:train:12101-12200batch: iter_time=8.538e-05, forward_time=0.300, loss_ctc=43.145, loss_att=44.979, acc=0.752, loss=44.429, backward_time=0.299, grad_norm=42.788, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.547e-04, train_time=1.318 +[gpub010:0/16] 2024-02-11 07:55:01,761 (trainer:762) INFO: 42epoch:train:12201-12300batch: iter_time=8.731e-05, forward_time=0.384, loss_ctc=39.482, loss_att=40.546, acc=0.771, loss=40.227, backward_time=0.306, grad_norm=40.642, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.547e-04, train_time=1.433 +[gpub010:0/16] 2024-02-11 07:56:50,466 (trainer:762) INFO: 42epoch:train:12301-12400batch: iter_time=8.194e-05, forward_time=0.296, loss_ctc=41.335, loss_att=40.576, acc=0.777, loss=40.804, backward_time=0.297, grad_norm=45.060, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.546e-04, train_time=1.086 +[gpub010:0/16] 2024-02-11 07:59:16,063 (trainer:762) INFO: 42epoch:train:12401-12500batch: iter_time=7.984e-05, forward_time=0.318, loss_ctc=50.235, loss_att=44.072, acc=0.769, loss=45.921, backward_time=0.298, grad_norm=45.234, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.546e-04, train_time=1.457 +[gpub010:0/16] 2024-02-11 07:59:36,090 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-02-11 07:59:55,675 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 07:59:59,979 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 08:00:00,009 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-02-11 08:00:00,026 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 08:06:25,753 (trainer:762) INFO: 42epoch:train:12501-12600batch: iter_time=3.115, forward_time=0.345, loss_ctc=48.401, loss_att=40.172, acc=0.766, loss=42.641, backward_time=0.304, grad_norm=46.207, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.546e-04, train_time=4.295 +[gpub010:0/16] 2024-02-11 08:08:54,317 (trainer:762) INFO: 42epoch:train:12601-12700batch: iter_time=8.373e-05, forward_time=0.301, loss_ctc=46.257, loss_att=48.171, acc=0.755, loss=47.597, backward_time=0.299, grad_norm=44.352, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.546e-04, train_time=1.487 +[gpub010:0/16] 2024-02-11 08:10:55,771 (trainer:762) INFO: 42epoch:train:12701-12800batch: iter_time=8.105e-05, forward_time=0.363, loss_ctc=43.675, loss_att=43.884, acc=0.780, loss=43.822, backward_time=0.313, grad_norm=39.208, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.546e-04, train_time=1.213 +[gpub010:0/16] 2024-02-11 08:13:25,027 (trainer:762) INFO: 42epoch:train:12801-12900batch: iter_time=4.204e-04, forward_time=0.306, loss_ctc=45.633, loss_att=49.595, acc=0.754, loss=48.406, backward_time=0.297, grad_norm=44.603, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.546e-04, train_time=1.494 +[gpub010:0/16] 2024-02-11 08:15:29,873 (trainer:762) INFO: 42epoch:train:12901-13000batch: iter_time=8.256e-05, forward_time=0.340, loss_ctc=45.713, loss_att=50.318, acc=0.745, loss=48.937, backward_time=0.321, grad_norm=42.261, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.546e-04, train_time=1.247 +[gpub010:0/16] 2024-02-11 08:17:53,004 (trainer:762) INFO: 42epoch:train:13001-13100batch: iter_time=8.087e-05, forward_time=0.290, loss_ctc=41.981, loss_att=45.691, acc=0.763, loss=44.578, backward_time=0.294, grad_norm=39.275, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.546e-04, train_time=1.432 +[gpub010:0/16] 2024-02-11 08:20:07,883 (trainer:762) INFO: 42epoch:train:13101-13200batch: iter_time=4.479e-04, forward_time=0.375, loss_ctc=33.974, loss_att=29.447, acc=0.806, loss=30.805, backward_time=0.308, grad_norm=32.416, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.546e-04, train_time=1.349 +[gpub010:0/16] 2024-02-11 08:21:56,713 (trainer:762) INFO: 42epoch:train:13201-13300batch: iter_time=8.439e-05, forward_time=0.292, loss_ctc=48.340, loss_att=45.890, acc=0.769, loss=46.625, backward_time=0.298, grad_norm=43.949, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.545e-04, train_time=1.087 +[gpub010:0/16] 2024-02-11 08:24:38,975 (trainer:762) INFO: 42epoch:train:13301-13400batch: iter_time=2.736e-04, forward_time=0.358, loss_ctc=49.127, loss_att=49.800, acc=0.743, loss=49.598, backward_time=0.305, grad_norm=47.411, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.545e-04, train_time=1.623 +[gpub010:0/16] 2024-02-11 08:26:26,473 (trainer:762) INFO: 42epoch:train:13401-13500batch: iter_time=8.902e-05, forward_time=0.288, loss_ctc=37.070, loss_att=36.875, acc=0.779, loss=36.933, backward_time=0.294, grad_norm=37.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.545e-04, train_time=1.075 +[gpub010:0/16] 2024-02-11 08:28:50,435 (trainer:762) INFO: 42epoch:train:13501-13600batch: iter_time=9.414e-05, forward_time=0.314, loss_ctc=38.511, loss_att=38.146, acc=0.778, loss=38.255, backward_time=0.296, grad_norm=37.554, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.545e-04, train_time=1.439 +[gpub010:0/16] 2024-02-11 08:31:09,742 (trainer:762) INFO: 42epoch:train:13601-13700batch: iter_time=8.544e-05, forward_time=0.385, loss_ctc=46.614, loss_att=47.629, acc=0.756, loss=47.324, backward_time=0.313, grad_norm=49.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.545e-04, train_time=1.393 +[gpub010:0/16] 2024-02-11 08:32:32,735 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-02-11 08:32:51,974 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 08:32:55,817 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 08:32:55,817 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-02-11 08:32:55,821 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 08:38:19,755 (trainer:762) INFO: 42epoch:train:13701-13800batch: iter_time=3.027, forward_time=0.303, loss_ctc=49.156, loss_att=42.337, acc=0.767, loss=44.383, backward_time=0.296, grad_norm=44.144, clip=100.000, loss_scale=6.231e+33, optim_step_time=0.094, optim0_lr0=1.545e-04, train_time=4.300 +[gpub010:0/16] 2024-02-11 08:40:44,740 (trainer:762) INFO: 42epoch:train:13801-13900batch: iter_time=8.189e-05, forward_time=0.339, loss_ctc=47.315, loss_att=40.464, acc=0.776, loss=42.519, backward_time=0.314, grad_norm=44.844, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.545e-04, train_time=1.449 +[gpub010:0/16] 2024-02-11 08:42:57,938 (trainer:762) INFO: 42epoch:train:13901-14000batch: iter_time=7.927e-05, forward_time=0.295, loss_ctc=45.818, loss_att=52.049, acc=0.767, loss=50.180, backward_time=0.300, grad_norm=42.110, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.545e-04, train_time=1.332 +[gpub010:0/16] 2024-02-11 08:45:03,359 (trainer:762) INFO: 42epoch:train:14001-14100batch: iter_time=8.363e-05, forward_time=0.313, loss_ctc=41.116, loss_att=44.630, acc=0.775, loss=43.575, backward_time=0.299, grad_norm=40.632, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.544e-04, train_time=1.253 +[gpub010:0/16] 2024-02-11 08:47:20,993 (trainer:762) INFO: 42epoch:train:14101-14200batch: iter_time=8.389e-05, forward_time=0.332, loss_ctc=43.411, loss_att=48.672, acc=0.757, loss=47.094, backward_time=0.319, grad_norm=45.079, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.544e-04, train_time=1.377 +[gpub010:0/16] 2024-02-11 08:49:23,560 (trainer:762) INFO: 42epoch:train:14201-14300batch: iter_time=8.162e-05, forward_time=0.289, loss_ctc=43.827, loss_att=47.959, acc=0.754, loss=46.719, backward_time=0.295, grad_norm=39.243, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.544e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 08:51:54,360 (trainer:762) INFO: 42epoch:train:14301-14400batch: iter_time=7.847e-05, forward_time=0.303, loss_ctc=40.883, loss_att=42.208, acc=0.802, loss=41.810, backward_time=0.302, grad_norm=36.839, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.544e-04, train_time=1.508 +[gpub010:0/16] 2024-02-11 08:53:57,110 (trainer:762) INFO: 42epoch:train:14401-14500batch: iter_time=2.534e-04, forward_time=0.356, loss_ctc=40.400, loss_att=36.813, acc=0.779, loss=37.889, backward_time=0.305, grad_norm=40.210, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.544e-04, train_time=1.227 +[gpub010:0/16] 2024-02-11 08:55:54,049 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 08:56:12,229 (trainer:762) INFO: 42epoch:train:14501-14600batch: iter_time=8.017e-05, forward_time=0.292, loss_ctc=48.189, loss_att=45.944, acc=0.769, loss=46.617, backward_time=0.297, grad_norm=43.000, clip=100.000, loss_scale=9.493e+33, optim_step_time=0.093, optim0_lr0=1.544e-04, train_time=1.352 +[gpub010:0/16] 2024-02-11 08:58:20,077 (trainer:762) INFO: 42epoch:train:14601-14700batch: iter_time=8.376e-05, forward_time=0.298, loss_ctc=42.983, loss_att=45.943, acc=0.757, loss=45.055, backward_time=0.304, grad_norm=44.344, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.544e-04, train_time=1.278 +[gpub010:0/16] 2024-02-11 09:00:38,038 (trainer:762) INFO: 42epoch:train:14701-14800batch: iter_time=8.362e-05, forward_time=0.357, loss_ctc=39.602, loss_att=40.371, acc=0.779, loss=40.140, backward_time=0.315, grad_norm=38.721, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.544e-04, train_time=1.380 +[gpub010:0/16] 2024-02-11 09:02:31,826 (trainer:762) INFO: 42epoch:train:14801-14900batch: iter_time=8.223e-05, forward_time=0.291, loss_ctc=41.232, loss_att=42.084, acc=0.773, loss=41.829, backward_time=0.297, grad_norm=43.124, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.543e-04, train_time=1.137 +[gpub010:0/16] 2024-02-11 09:04:50,158 (trainer:762) INFO: 42epoch:train:14901-15000batch: iter_time=8.725e-05, forward_time=0.307, loss_ctc=49.686, loss_att=44.464, acc=0.774, loss=46.031, backward_time=0.300, grad_norm=43.708, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.543e-04, train_time=1.383 +[gpub010:0/16] 2024-02-11 09:44:28,288 (trainer:361) INFO: 42epoch results: [train] iter_time=0.249, forward_time=0.320, loss_ctc=44.479, loss_att=44.367, acc=0.767, loss=44.400, backward_time=0.307, grad_norm=42.822, clip=100.000, loss_scale=5.337e+33, optim_step_time=0.095, optim0_lr0=1.553e-04, train_time=1.535, time=6 hours, 24 minutes and 12.71 seconds, total_count=660000, gpu_max_cached_mem_GB=42.092, [valid] loss_ctc=35.666, cer_ctc=0.178, loss_att=37.943, acc=0.698, cer=0.292, wer=0.989, loss=37.260, time=39 minutes and 13.53 seconds, total_count=205524, gpu_max_cached_mem_GB=42.092 +[gpub010:0/16] 2024-02-11 09:44:37,474 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-02-11 09:44:37,569 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/37epoch.pth +[gpub010:0/16] 2024-02-11 09:44:37,570 (trainer:290) INFO: 43/45epoch started. Estimated time to finish: 21 hours, 52 minutes and 0.11 seconds +[gpub010:0/16] 2024-02-11 09:44:37,580 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-02-11 09:44:56,225 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 09:45:00,443 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 09:45:00,443 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-02-11 09:45:00,446 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 09:51:13,658 (trainer:762) INFO: 43epoch:train:1-100batch: iter_time=2.750, forward_time=0.310, loss_ctc=56.909, loss_att=59.341, acc=0.739, loss=58.611, backward_time=0.305, grad_norm=57.169, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.543e-04, train_time=3.960 +[gpub010:0/16] 2024-02-11 09:53:20,747 (trainer:762) INFO: 43epoch:train:101-200batch: iter_time=1.078e-04, forward_time=0.348, loss_ctc=50.376, loss_att=47.152, acc=0.754, loss=48.119, backward_time=0.311, grad_norm=48.572, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.543e-04, train_time=1.271 +[gpub010:0/16] 2024-02-11 09:55:30,443 (trainer:762) INFO: 43epoch:train:201-300batch: iter_time=2.329e-04, forward_time=0.304, loss_ctc=45.980, loss_att=43.168, acc=0.764, loss=44.012, backward_time=0.309, grad_norm=42.119, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.543e-04, train_time=1.296 +[gpub010:0/16] 2024-02-11 09:58:49,479 (trainer:762) INFO: 43epoch:train:301-400batch: iter_time=0.078, forward_time=0.304, loss_ctc=57.625, loss_att=49.287, acc=0.749, loss=51.789, backward_time=0.308, grad_norm=53.419, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.543e-04, train_time=1.991 +[gpub010:0/16] 2024-02-11 10:00:57,617 (trainer:762) INFO: 43epoch:train:401-500batch: iter_time=9.189e-05, forward_time=0.315, loss_ctc=54.170, loss_att=47.018, acc=0.761, loss=49.164, backward_time=0.326, grad_norm=46.893, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.543e-04, train_time=1.281 +[gpub010:0/16] 2024-02-11 10:03:09,341 (trainer:762) INFO: 43epoch:train:501-600batch: iter_time=2.010e-04, forward_time=0.306, loss_ctc=50.374, loss_att=50.500, acc=0.769, loss=50.462, backward_time=0.305, grad_norm=51.148, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.543e-04, train_time=1.316 +[gpub010:0/16] 2024-02-11 10:05:16,876 (trainer:762) INFO: 43epoch:train:601-700batch: iter_time=8.759e-05, forward_time=0.328, loss_ctc=42.486, loss_att=38.187, acc=0.792, loss=39.477, backward_time=0.329, grad_norm=40.331, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.542e-04, train_time=1.276 +[gpub010:0/16] 2024-02-11 10:07:25,710 (trainer:762) INFO: 43epoch:train:701-800batch: iter_time=8.193e-05, forward_time=0.303, loss_ctc=41.074, loss_att=39.698, acc=0.766, loss=40.111, backward_time=0.308, grad_norm=43.228, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.542e-04, train_time=1.288 +[gpub010:0/16] 2024-02-11 10:09:49,977 (trainer:762) INFO: 43epoch:train:801-900batch: iter_time=3.606e-04, forward_time=0.339, loss_ctc=57.079, loss_att=51.466, acc=0.774, loss=53.150, backward_time=0.321, grad_norm=45.499, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.542e-04, train_time=1.442 +[gpub010:0/16] 2024-02-11 10:11:52,765 (trainer:762) INFO: 43epoch:train:901-1000batch: iter_time=8.068e-05, forward_time=0.302, loss_ctc=43.300, loss_att=42.403, acc=0.767, loss=42.672, backward_time=0.312, grad_norm=42.359, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.542e-04, train_time=1.228 +[gpub010:0/16] 2024-02-11 10:14:04,418 (trainer:762) INFO: 43epoch:train:1001-1100batch: iter_time=7.525e-05, forward_time=0.325, loss_ctc=48.423, loss_att=50.032, acc=0.760, loss=49.549, backward_time=0.301, grad_norm=45.787, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.542e-04, train_time=1.316 +[gpub010:0/16] 2024-02-11 10:16:17,873 (trainer:762) INFO: 43epoch:train:1101-1200batch: iter_time=7.998e-05, forward_time=0.311, loss_ctc=42.509, loss_att=38.152, acc=0.774, loss=39.459, backward_time=0.322, grad_norm=43.998, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.542e-04, train_time=1.334 +[gpub010:0/16] 2024-02-11 10:17:41,422 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-02-11 10:18:01,134 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 10:18:04,636 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 10:18:04,636 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-02-11 10:18:04,684 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 10:23:25,934 (trainer:762) INFO: 43epoch:train:1201-1300batch: iter_time=3.018, forward_time=0.360, loss_ctc=46.543, loss_att=49.842, acc=0.742, loss=48.853, backward_time=0.310, grad_norm=48.490, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.542e-04, train_time=4.279 +[gpub010:0/16] 2024-02-11 10:25:35,154 (trainer:762) INFO: 43epoch:train:1301-1400batch: iter_time=8.851e-05, forward_time=0.312, loss_ctc=53.325, loss_att=55.271, acc=0.731, loss=54.687, backward_time=0.316, grad_norm=56.449, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.542e-04, train_time=1.293 +[gpub010:0/16] 2024-02-11 10:27:44,652 (trainer:762) INFO: 43epoch:train:1401-1500batch: iter_time=8.081e-05, forward_time=0.295, loss_ctc=50.103, loss_att=46.074, acc=0.761, loss=47.282, backward_time=0.310, grad_norm=45.891, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.541e-04, train_time=1.295 +[gpub010:0/16] 2024-02-11 10:29:49,749 (trainer:762) INFO: 43epoch:train:1501-1600batch: iter_time=9.256e-05, forward_time=0.346, loss_ctc=44.242, loss_att=38.129, acc=0.757, loss=39.963, backward_time=0.300, grad_norm=41.403, clip=100.000, loss_scale=6.075e+33, optim_step_time=0.094, optim0_lr0=1.541e-04, train_time=1.251 +[gpub010:0/16] 2024-02-11 10:32:08,659 (trainer:762) INFO: 43epoch:train:1601-1700batch: iter_time=8.725e-05, forward_time=0.301, loss_ctc=62.761, loss_att=51.546, acc=0.731, loss=54.911, backward_time=0.314, grad_norm=54.125, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.541e-04, train_time=1.389 +[gpub010:0/16] 2024-02-11 10:34:02,673 (trainer:762) INFO: 43epoch:train:1701-1800batch: iter_time=9.123e-05, forward_time=0.292, loss_ctc=49.336, loss_att=51.266, acc=0.762, loss=50.687, backward_time=0.298, grad_norm=47.969, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.541e-04, train_time=1.140 +[gpub010:0/16] 2024-02-11 10:36:15,780 (trainer:762) INFO: 43epoch:train:1801-1900batch: iter_time=8.360e-05, forward_time=0.384, loss_ctc=42.617, loss_att=40.996, acc=0.776, loss=41.482, backward_time=0.305, grad_norm=41.121, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.541e-04, train_time=1.331 +[gpub010:0/16] 2024-02-11 10:38:25,021 (trainer:762) INFO: 43epoch:train:1901-2000batch: iter_time=7.772e-05, forward_time=0.304, loss_ctc=42.341, loss_att=37.944, acc=0.776, loss=39.263, backward_time=0.302, grad_norm=42.985, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.541e-04, train_time=1.292 +[gpub010:0/16] 2024-02-11 10:40:36,223 (trainer:762) INFO: 43epoch:train:2001-2100batch: iter_time=8.095e-05, forward_time=0.300, loss_ctc=46.492, loss_att=41.478, acc=0.773, loss=42.982, backward_time=0.309, grad_norm=42.132, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.541e-04, train_time=1.312 +[gpub010:0/16] 2024-02-11 10:42:42,610 (trainer:762) INFO: 43epoch:train:2101-2200batch: iter_time=9.058e-05, forward_time=0.352, loss_ctc=52.011, loss_att=50.441, acc=0.755, loss=50.912, backward_time=0.319, grad_norm=47.976, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.541e-04, train_time=1.264 +[gpub010:0/16] 2024-02-11 10:44:50,945 (trainer:762) INFO: 43epoch:train:2201-2300batch: iter_time=8.971e-05, forward_time=0.301, loss_ctc=42.058, loss_att=43.750, acc=0.768, loss=43.243, backward_time=0.305, grad_norm=42.870, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.540e-04, train_time=1.283 +[gpub010:0/16] 2024-02-11 10:46:55,246 (trainer:762) INFO: 43epoch:train:2301-2400batch: iter_time=8.806e-05, forward_time=0.301, loss_ctc=45.561, loss_att=43.685, acc=0.758, loss=44.247, backward_time=0.316, grad_norm=45.809, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.540e-04, train_time=1.242 +[gpub010:0/16] 2024-02-11 10:49:06,935 (trainer:762) INFO: 43epoch:train:2401-2500batch: iter_time=8.497e-05, forward_time=0.340, loss_ctc=41.626, loss_att=37.077, acc=0.778, loss=38.441, backward_time=0.308, grad_norm=38.840, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.540e-04, train_time=1.317 +[gpub010:0/16] 2024-02-11 10:49:26,962 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-02-11 10:49:46,395 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 10:49:49,940 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 10:49:49,940 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-02-11 10:49:50,034 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 10:58:15,443 (trainer:762) INFO: 43epoch:train:2501-2600batch: iter_time=3.984, forward_time=0.356, loss_ctc=54.897, loss_att=59.625, acc=0.724, loss=58.207, backward_time=0.309, grad_norm=65.082, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.540e-04, train_time=5.485 +[gpub010:0/16] 2024-02-11 11:00:04,290 (trainer:762) INFO: 43epoch:train:2601-2700batch: iter_time=7.661e-05, forward_time=0.290, loss_ctc=48.301, loss_att=45.696, acc=0.753, loss=46.478, backward_time=0.295, grad_norm=44.750, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.540e-04, train_time=1.088 +[gpub010:0/16] 2024-02-11 11:02:28,905 (trainer:762) INFO: 43epoch:train:2701-2800batch: iter_time=8.802e-05, forward_time=0.408, loss_ctc=45.176, loss_att=41.393, acc=0.768, loss=42.528, backward_time=0.334, grad_norm=40.319, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.540e-04, train_time=1.446 +[gpub010:0/16] 2024-02-11 11:04:41,154 (trainer:762) INFO: 43epoch:train:2801-2900batch: iter_time=7.479e-05, forward_time=0.293, loss_ctc=56.105, loss_att=47.869, acc=0.744, loss=50.340, backward_time=0.297, grad_norm=50.685, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.540e-04, train_time=1.322 +[gpub010:0/16] 2024-02-11 11:05:04,721 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 11:06:43,685 (trainer:762) INFO: 43epoch:train:2901-3000batch: iter_time=7.793e-05, forward_time=0.290, loss_ctc=49.994, loss_att=44.527, acc=0.760, loss=46.167, backward_time=0.296, grad_norm=43.151, clip=100.000, loss_scale=6.294e+33, optim_step_time=0.093, optim0_lr0=1.540e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 11:09:00,883 (trainer:762) INFO: 43epoch:train:3001-3100batch: iter_time=4.025e-04, forward_time=0.405, loss_ctc=48.152, loss_att=48.244, acc=0.761, loss=48.216, backward_time=0.314, grad_norm=49.441, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.540e-04, train_time=1.372 +[gpub010:0/16] 2024-02-11 11:13:04,241 (trainer:762) INFO: 43epoch:train:3101-3200batch: iter_time=7.974e-05, forward_time=0.290, loss_ctc=41.407, loss_att=37.957, acc=0.786, loss=38.992, backward_time=0.294, grad_norm=41.279, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.539e-04, train_time=2.433 +[gpub010:0/16] 2024-02-11 11:15:00,844 (trainer:762) INFO: 43epoch:train:3201-3300batch: iter_time=7.574e-05, forward_time=0.290, loss_ctc=40.609, loss_att=39.614, acc=0.764, loss=39.913, backward_time=0.294, grad_norm=41.708, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.539e-04, train_time=1.166 +[gpub010:0/16] 2024-02-11 11:17:06,423 (trainer:762) INFO: 43epoch:train:3301-3400batch: iter_time=7.830e-05, forward_time=0.376, loss_ctc=56.399, loss_att=50.099, acc=0.770, loss=51.989, backward_time=0.361, grad_norm=44.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.539e-04, train_time=1.255 +[gpub010:0/16] 2024-02-11 11:19:16,902 (trainer:762) INFO: 43epoch:train:3401-3500batch: iter_time=8.792e-05, forward_time=0.290, loss_ctc=41.809, loss_att=40.984, acc=0.771, loss=41.231, backward_time=0.294, grad_norm=41.336, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.539e-04, train_time=1.305 +[gpub010:0/16] 2024-02-11 11:21:25,094 (trainer:762) INFO: 43epoch:train:3501-3600batch: iter_time=8.018e-05, forward_time=0.292, loss_ctc=46.994, loss_att=47.456, acc=0.765, loss=47.317, backward_time=0.298, grad_norm=43.290, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.539e-04, train_time=1.282 +[gpub010:0/16] 2024-02-11 11:23:38,215 (trainer:762) INFO: 43epoch:train:3601-3700batch: iter_time=9.136e-05, forward_time=0.398, loss_ctc=40.888, loss_att=36.875, acc=0.773, loss=38.079, backward_time=0.316, grad_norm=40.514, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.539e-04, train_time=1.331 +[gpub010:0/16] 2024-02-11 11:25:01,357 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-02-11 11:25:59,743 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 11:26:10,097 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 11:26:10,097 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-02-11 11:26:10,884 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 11:32:03,712 (trainer:762) INFO: 43epoch:train:3701-3800batch: iter_time=3.756, forward_time=0.293, loss_ctc=45.570, loss_att=48.032, acc=0.745, loss=47.294, backward_time=0.296, grad_norm=43.470, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.539e-04, train_time=5.055 +[gpub010:0/16] 2024-02-11 11:34:12,286 (trainer:762) INFO: 43epoch:train:3801-3900batch: iter_time=7.963e-05, forward_time=0.412, loss_ctc=51.765, loss_att=53.265, acc=0.737, loss=52.815, backward_time=0.313, grad_norm=52.158, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.539e-04, train_time=1.285 +[gpub010:0/16] 2024-02-11 11:36:08,092 (trainer:762) INFO: 43epoch:train:3901-4000batch: iter_time=8.051e-05, forward_time=0.293, loss_ctc=49.178, loss_att=44.282, acc=0.768, loss=45.751, backward_time=0.298, grad_norm=46.045, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.538e-04, train_time=1.158 +[gpub010:0/16] 2024-02-11 11:38:25,290 (trainer:762) INFO: 43epoch:train:4001-4100batch: iter_time=8.277e-05, forward_time=0.291, loss_ctc=43.756, loss_att=37.279, acc=0.762, loss=39.222, backward_time=0.295, grad_norm=42.347, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.538e-04, train_time=1.372 +[gpub010:0/16] 2024-02-11 11:40:35,740 (trainer:762) INFO: 43epoch:train:4101-4200batch: iter_time=8.067e-05, forward_time=0.403, loss_ctc=60.736, loss_att=50.052, acc=0.738, loss=53.257, backward_time=0.314, grad_norm=52.502, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.538e-04, train_time=1.304 +[gpub010:0/16] 2024-02-11 11:42:35,544 (trainer:762) INFO: 43epoch:train:4201-4300batch: iter_time=8.026e-05, forward_time=0.293, loss_ctc=48.217, loss_att=50.266, acc=0.766, loss=49.651, backward_time=0.297, grad_norm=49.704, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.538e-04, train_time=1.198 +[gpub010:0/16] 2024-02-11 11:44:48,995 (trainer:762) INFO: 43epoch:train:4301-4400batch: iter_time=8.009e-05, forward_time=0.291, loss_ctc=42.469, loss_att=40.789, acc=0.779, loss=41.293, backward_time=0.296, grad_norm=40.742, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.538e-04, train_time=1.334 +[gpub010:0/16] 2024-02-11 11:46:56,363 (trainer:762) INFO: 43epoch:train:4401-4500batch: iter_time=7.767e-05, forward_time=0.377, loss_ctc=41.397, loss_att=36.928, acc=0.782, loss=38.268, backward_time=0.323, grad_norm=43.811, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.538e-04, train_time=1.273 +[gpub010:0/16] 2024-02-11 11:49:04,312 (trainer:762) INFO: 43epoch:train:4501-4600batch: iter_time=8.170e-05, forward_time=0.292, loss_ctc=46.696, loss_att=40.998, acc=0.777, loss=42.707, backward_time=0.296, grad_norm=43.004, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.538e-04, train_time=1.279 +[gpub010:0/16] 2024-02-11 11:51:09,796 (trainer:762) INFO: 43epoch:train:4601-4700batch: iter_time=8.195e-05, forward_time=0.293, loss_ctc=50.954, loss_att=49.553, acc=0.757, loss=49.973, backward_time=0.299, grad_norm=44.894, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.538e-04, train_time=1.255 +[gpub010:0/16] 2024-02-11 11:53:15,683 (trainer:762) INFO: 43epoch:train:4701-4800batch: iter_time=8.407e-05, forward_time=0.376, loss_ctc=41.592, loss_att=43.409, acc=0.771, loss=42.864, backward_time=0.322, grad_norm=41.719, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.104, optim0_lr0=1.537e-04, train_time=1.254 +[gpub010:0/16] 2024-02-11 11:55:24,170 (trainer:762) INFO: 43epoch:train:4801-4900batch: iter_time=8.375e-05, forward_time=0.296, loss_ctc=45.084, loss_att=42.353, acc=0.763, loss=43.172, backward_time=0.295, grad_norm=45.219, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.537e-04, train_time=1.289 +[gpub010:0/16] 2024-02-11 11:57:25,773 (trainer:762) INFO: 43epoch:train:4901-5000batch: iter_time=8.380e-05, forward_time=0.291, loss_ctc=41.433, loss_att=36.209, acc=0.783, loss=37.776, backward_time=0.296, grad_norm=37.976, clip=100.000, loss_scale=9.242e+33, optim_step_time=0.093, optim0_lr0=1.537e-04, train_time=1.216 +[gpub010:0/16] 2024-02-11 11:57:45,896 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-02-11 11:58:04,996 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 11:58:08,525 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 11:58:08,525 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-02-11 11:58:08,530 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 12:03:55,687 (trainer:762) INFO: 43epoch:train:5001-5100batch: iter_time=2.710, forward_time=0.349, loss_ctc=54.394, loss_att=60.884, acc=0.737, loss=58.937, backward_time=0.313, grad_norm=54.518, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.537e-04, train_time=3.899 +[gpub010:0/16] 2024-02-11 12:06:03,818 (trainer:762) INFO: 43epoch:train:5101-5200batch: iter_time=8.386e-05, forward_time=0.290, loss_ctc=47.508, loss_att=46.010, acc=0.762, loss=46.459, backward_time=0.296, grad_norm=43.999, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.537e-04, train_time=1.281 +[gpub010:0/16] 2024-02-11 12:08:05,485 (trainer:762) INFO: 43epoch:train:5201-5300batch: iter_time=8.296e-05, forward_time=0.339, loss_ctc=44.853, loss_att=42.699, acc=0.770, loss=43.345, backward_time=0.351, grad_norm=39.345, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.537e-04, train_time=1.216 +[gpub010:0/16] 2024-02-11 12:10:18,290 (trainer:762) INFO: 43epoch:train:5301-5400batch: iter_time=8.560e-05, forward_time=0.294, loss_ctc=56.471, loss_att=48.701, acc=0.756, loss=51.032, backward_time=0.298, grad_norm=50.992, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.537e-04, train_time=1.327 +[gpub010:0/16] 2024-02-11 12:12:22,414 (trainer:762) INFO: 43epoch:train:5401-5500batch: iter_time=8.518e-05, forward_time=0.291, loss_ctc=49.615, loss_att=45.855, acc=0.768, loss=46.983, backward_time=0.297, grad_norm=46.842, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.537e-04, train_time=1.241 +[gpub010:0/16] 2024-02-11 12:14:44,986 (trainer:762) INFO: 43epoch:train:5501-5600batch: iter_time=8.595e-05, forward_time=0.412, loss_ctc=47.548, loss_att=49.820, acc=0.775, loss=49.138, backward_time=0.316, grad_norm=48.200, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.536e-04, train_time=1.426 +[gpub010:0/16] 2024-02-11 12:16:40,409 (trainer:762) INFO: 43epoch:train:5601-5700batch: iter_time=8.089e-05, forward_time=0.294, loss_ctc=41.246, loss_att=37.495, acc=0.798, loss=38.621, backward_time=0.297, grad_norm=38.507, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.536e-04, train_time=1.154 +[gpub010:0/16] 2024-02-11 12:18:46,910 (trainer:762) INFO: 43epoch:train:5701-5800batch: iter_time=8.125e-05, forward_time=0.291, loss_ctc=40.218, loss_att=39.268, acc=0.772, loss=39.553, backward_time=0.295, grad_norm=41.749, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.536e-04, train_time=1.265 +[gpub010:0/16] 2024-02-11 12:20:52,986 (trainer:762) INFO: 43epoch:train:5801-5900batch: iter_time=8.528e-05, forward_time=0.403, loss_ctc=55.366, loss_att=51.238, acc=0.777, loss=52.476, backward_time=0.316, grad_norm=46.831, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.536e-04, train_time=1.260 +[gpub010:0/16] 2024-02-11 12:22:53,081 (trainer:762) INFO: 43epoch:train:5901-6000batch: iter_time=8.017e-05, forward_time=0.294, loss_ctc=41.733, loss_att=41.860, acc=0.771, loss=41.822, backward_time=0.298, grad_norm=40.910, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.536e-04, train_time=1.201 +[gpub010:0/16] 2024-02-11 12:25:08,246 (trainer:762) INFO: 43epoch:train:6001-6100batch: iter_time=8.122e-05, forward_time=0.293, loss_ctc=46.584, loss_att=49.202, acc=0.766, loss=48.417, backward_time=0.298, grad_norm=47.305, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.536e-04, train_time=1.352 +[gpub010:0/16] 2024-02-11 12:26:48,777 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 12:27:14,630 (trainer:762) INFO: 43epoch:train:6101-6200batch: iter_time=6.166e-04, forward_time=0.373, loss_ctc=41.011, loss_att=37.524, acc=0.779, loss=38.570, backward_time=0.314, grad_norm=41.405, clip=100.000, loss_scale=9.126e+33, optim_step_time=0.108, optim0_lr0=1.536e-04, train_time=1.264 +[gpub010:0/16] 2024-02-11 12:28:35,252 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-02-11 12:28:54,252 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 12:28:58,112 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 12:28:58,112 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-02-11 12:28:58,116 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 12:34:20,094 (trainer:762) INFO: 43epoch:train:6201-6300batch: iter_time=2.985, forward_time=0.291, loss_ctc=45.484, loss_att=47.959, acc=0.755, loss=47.216, backward_time=0.297, grad_norm=44.998, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.536e-04, train_time=4.254 +[gpub010:0/16] 2024-02-11 12:36:41,870 (trainer:762) INFO: 43epoch:train:6301-6400batch: iter_time=7.763e-05, forward_time=0.386, loss_ctc=51.560, loss_att=53.611, acc=0.751, loss=52.996, backward_time=0.321, grad_norm=48.988, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.536e-04, train_time=1.417 +[gpub010:0/16] 2024-02-11 12:38:51,252 (trainer:762) INFO: 43epoch:train:6401-6500batch: iter_time=8.027e-05, forward_time=0.292, loss_ctc=49.237, loss_att=44.477, acc=0.777, loss=45.905, backward_time=0.298, grad_norm=45.151, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.535e-04, train_time=1.294 +[gpub010:0/16] 2024-02-11 12:40:55,836 (trainer:762) INFO: 43epoch:train:6501-6600batch: iter_time=8.443e-05, forward_time=0.291, loss_ctc=43.694, loss_att=37.904, acc=0.767, loss=39.641, backward_time=0.297, grad_norm=41.267, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.535e-04, train_time=1.245 +[gpub010:0/16] 2024-02-11 12:43:09,731 (trainer:762) INFO: 43epoch:train:6601-6700batch: iter_time=9.294e-05, forward_time=0.378, loss_ctc=60.931, loss_att=52.669, acc=0.745, loss=55.148, backward_time=0.318, grad_norm=55.935, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.535e-04, train_time=1.339 +[gpub010:0/16] 2024-02-11 12:45:28,822 (trainer:762) INFO: 43epoch:train:6701-6800batch: iter_time=8.945e-05, forward_time=0.293, loss_ctc=47.924, loss_att=50.907, acc=0.771, loss=50.012, backward_time=0.299, grad_norm=46.766, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.535e-04, train_time=1.391 +[gpub010:0/16] 2024-02-11 12:47:30,972 (trainer:762) INFO: 43epoch:train:6801-6900batch: iter_time=7.684e-05, forward_time=0.291, loss_ctc=41.580, loss_att=40.574, acc=0.796, loss=40.876, backward_time=0.297, grad_norm=37.252, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.535e-04, train_time=1.221 +[gpub010:0/16] 2024-02-11 12:49:50,963 (trainer:762) INFO: 43epoch:train:6901-7000batch: iter_time=7.985e-05, forward_time=0.367, loss_ctc=41.543, loss_att=37.129, acc=0.791, loss=38.453, backward_time=0.336, grad_norm=42.900, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.535e-04, train_time=1.400 +[gpub010:0/16] 2024-02-11 12:51:57,247 (trainer:762) INFO: 43epoch:train:7001-7100batch: iter_time=8.043e-05, forward_time=0.291, loss_ctc=45.514, loss_att=41.652, acc=0.780, loss=42.810, backward_time=0.297, grad_norm=41.771, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.535e-04, train_time=1.263 +[gpub010:0/16] 2024-02-11 12:54:32,630 (trainer:762) INFO: 43epoch:train:7101-7200batch: iter_time=9.482e-05, forward_time=0.363, loss_ctc=49.847, loss_att=49.208, acc=0.767, loss=49.400, backward_time=0.359, grad_norm=44.118, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.535e-04, train_time=1.553 +[gpub010:0/16] 2024-02-11 12:56:50,410 (trainer:762) INFO: 43epoch:train:7201-7300batch: iter_time=8.408e-05, forward_time=0.293, loss_ctc=41.010, loss_att=44.006, acc=0.774, loss=43.107, backward_time=0.298, grad_norm=42.942, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.534e-04, train_time=1.378 +[gpub010:0/16] 2024-02-11 12:58:54,311 (trainer:762) INFO: 43epoch:train:7301-7400batch: iter_time=3.015e-04, forward_time=0.369, loss_ctc=45.167, loss_att=43.443, acc=0.764, loss=43.960, backward_time=0.333, grad_norm=44.195, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.534e-04, train_time=1.238 +[gpub010:0/16] 2024-02-11 13:01:18,495 (trainer:762) INFO: 43epoch:train:7401-7500batch: iter_time=8.887e-05, forward_time=0.291, loss_ctc=41.190, loss_att=37.652, acc=0.786, loss=38.713, backward_time=0.295, grad_norm=38.399, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.534e-04, train_time=1.443 +[gpub010:0/16] 2024-02-11 13:01:38,522 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-02-11 13:01:58,156 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 13:02:01,952 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 13:02:01,952 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-02-11 13:02:01,955 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 13:08:41,680 (trainer:762) INFO: 43epoch:train:7501-7600batch: iter_time=3.084, forward_time=0.291, loss_ctc=54.097, loss_att=61.622, acc=0.721, loss=59.364, backward_time=0.298, grad_norm=53.928, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.534e-04, train_time=4.431 +[gpub010:0/16] 2024-02-11 13:11:05,158 (trainer:762) INFO: 43epoch:train:7601-7700batch: iter_time=8.142e-05, forward_time=0.359, loss_ctc=47.382, loss_att=45.720, acc=0.753, loss=46.218, backward_time=0.358, grad_norm=47.839, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.534e-04, train_time=1.434 +[gpub010:0/16] 2024-02-11 13:13:18,515 (trainer:762) INFO: 43epoch:train:7701-7800batch: iter_time=8.174e-05, forward_time=0.290, loss_ctc=44.882, loss_att=41.567, acc=0.770, loss=42.562, backward_time=0.294, grad_norm=39.603, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.534e-04, train_time=1.335 +[gpub010:0/16] 2024-02-11 13:15:36,159 (trainer:762) INFO: 43epoch:train:7801-7900batch: iter_time=8.273e-05, forward_time=0.329, loss_ctc=55.118, loss_att=47.601, acc=0.748, loss=49.856, backward_time=0.384, grad_norm=49.993, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.534e-04, train_time=1.376 +[gpub010:0/16] 2024-02-11 13:17:51,302 (trainer:762) INFO: 43epoch:train:7901-8000batch: iter_time=8.742e-05, forward_time=0.291, loss_ctc=49.470, loss_att=44.028, acc=0.764, loss=45.661, backward_time=0.296, grad_norm=43.705, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.534e-04, train_time=1.351 +[gpub010:0/16] 2024-02-11 13:19:54,438 (trainer:762) INFO: 43epoch:train:8001-8100batch: iter_time=8.489e-05, forward_time=0.292, loss_ctc=47.556, loss_att=48.542, acc=0.762, loss=48.246, backward_time=0.297, grad_norm=49.126, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.533e-04, train_time=1.232 +[gpub010:0/16] 2024-02-11 13:22:02,270 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 13:22:07,854 (trainer:762) INFO: 43epoch:train:8101-8200batch: iter_time=8.971e-05, forward_time=0.413, loss_ctc=41.156, loss_att=37.970, acc=0.787, loss=38.926, backward_time=0.323, grad_norm=40.059, clip=100.000, loss_scale=6.136e+33, optim_step_time=0.095, optim0_lr0=1.533e-04, train_time=1.333 +[gpub010:0/16] 2024-02-11 13:24:19,263 (trainer:762) INFO: 43epoch:train:8201-8300batch: iter_time=8.300e-05, forward_time=0.289, loss_ctc=39.966, loss_att=38.775, acc=0.770, loss=39.132, backward_time=0.293, grad_norm=40.780, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.533e-04, train_time=1.315 +[gpub010:0/16] 2024-02-11 13:26:23,972 (trainer:762) INFO: 43epoch:train:8301-8400batch: iter_time=8.078e-05, forward_time=0.295, loss_ctc=55.063, loss_att=49.548, acc=0.772, loss=51.202, backward_time=0.301, grad_norm=44.664, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.533e-04, train_time=1.246 +[gpub010:0/16] 2024-02-11 13:28:37,460 (trainer:762) INFO: 43epoch:train:8401-8500batch: iter_time=8.068e-05, forward_time=0.352, loss_ctc=41.015, loss_att=41.487, acc=0.769, loss=41.345, backward_time=0.308, grad_norm=43.104, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.533e-04, train_time=1.336 +[gpub010:0/16] 2024-02-11 13:30:38,849 (trainer:762) INFO: 43epoch:train:8501-8600batch: iter_time=8.645e-05, forward_time=0.295, loss_ctc=46.737, loss_att=47.536, acc=0.764, loss=47.297, backward_time=0.299, grad_norm=44.178, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.533e-04, train_time=1.213 +[gpub010:0/16] 2024-02-11 13:32:40,310 (trainer:762) INFO: 43epoch:train:8601-8700batch: iter_time=7.861e-05, forward_time=0.310, loss_ctc=39.954, loss_att=35.966, acc=0.777, loss=37.162, backward_time=0.294, grad_norm=42.263, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.533e-04, train_time=1.215 +[gpub010:0/16] 2024-02-11 13:34:14,869 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-02-11 13:34:34,249 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 13:34:37,816 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 13:34:37,816 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-02-11 13:34:37,846 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 13:40:35,499 (trainer:762) INFO: 43epoch:train:8701-8800batch: iter_time=3.363, forward_time=0.367, loss_ctc=44.844, loss_att=47.172, acc=0.749, loss=46.474, backward_time=0.306, grad_norm=42.462, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.533e-04, train_time=4.751 +[gpub010:0/16] 2024-02-11 13:43:05,284 (trainer:762) INFO: 43epoch:train:8801-8900batch: iter_time=5.359e-04, forward_time=0.364, loss_ctc=50.490, loss_att=53.078, acc=0.739, loss=52.301, backward_time=0.314, grad_norm=50.053, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.117, optim0_lr0=1.533e-04, train_time=1.498 +[gpub010:0/16] 2024-02-11 13:45:47,310 (trainer:762) INFO: 43epoch:train:8901-9000batch: iter_time=8.018e-05, forward_time=0.353, loss_ctc=48.959, loss_att=44.257, acc=0.767, loss=45.668, backward_time=0.322, grad_norm=45.713, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.532e-04, train_time=1.620 +[gpub010:0/16] 2024-02-11 13:47:47,071 (trainer:762) INFO: 43epoch:train:9001-9100batch: iter_time=7.977e-05, forward_time=0.351, loss_ctc=43.533, loss_att=37.491, acc=0.765, loss=39.303, backward_time=0.310, grad_norm=42.035, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.532e-04, train_time=1.196 +[gpub010:0/16] 2024-02-11 13:50:17,499 (trainer:762) INFO: 43epoch:train:9101-9200batch: iter_time=8.371e-05, forward_time=0.330, loss_ctc=59.268, loss_att=49.419, acc=0.741, loss=52.373, backward_time=0.309, grad_norm=51.879, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.532e-04, train_time=1.504 +[gpub010:0/16] 2024-02-11 13:52:31,817 (trainer:762) INFO: 43epoch:train:9201-9300batch: iter_time=2.250e-04, forward_time=0.329, loss_ctc=47.759, loss_att=50.374, acc=0.768, loss=49.589, backward_time=0.338, grad_norm=49.933, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.532e-04, train_time=1.343 +[gpub010:0/16] 2024-02-11 13:54:40,095 (trainer:762) INFO: 43epoch:train:9301-9400batch: iter_time=3.844e-04, forward_time=0.363, loss_ctc=41.803, loss_att=40.573, acc=0.779, loss=40.942, backward_time=0.326, grad_norm=40.301, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.532e-04, train_time=1.283 +[gpub010:0/16] 2024-02-11 13:56:55,757 (trainer:762) INFO: 43epoch:train:9401-9500batch: iter_time=7.291e-04, forward_time=0.349, loss_ctc=41.059, loss_att=36.875, acc=0.783, loss=38.130, backward_time=0.310, grad_norm=41.575, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.532e-04, train_time=1.357 +[gpub010:0/16] 2024-02-11 13:59:17,691 (trainer:762) INFO: 43epoch:train:9501-9600batch: iter_time=5.063e-04, forward_time=0.414, loss_ctc=45.795, loss_att=40.710, acc=0.779, loss=42.236, backward_time=0.324, grad_norm=41.942, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.532e-04, train_time=1.418 +[gpub010:0/16] 2024-02-11 14:01:46,148 (trainer:762) INFO: 43epoch:train:9601-9700batch: iter_time=3.546e-04, forward_time=0.385, loss_ctc=50.329, loss_att=49.478, acc=0.758, loss=49.734, backward_time=0.358, grad_norm=46.418, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.532e-04, train_time=1.485 +[gpub010:0/16] 2024-02-11 14:04:11,459 (trainer:762) INFO: 43epoch:train:9701-9800batch: iter_time=9.004e-05, forward_time=0.378, loss_ctc=40.950, loss_att=42.857, acc=0.773, loss=42.285, backward_time=0.327, grad_norm=41.904, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.531e-04, train_time=1.453 +[gpub010:0/16] 2024-02-11 14:06:28,336 (trainer:762) INFO: 43epoch:train:9801-9900batch: iter_time=3.197e-04, forward_time=0.348, loss_ctc=44.302, loss_att=42.338, acc=0.764, loss=42.927, backward_time=0.350, grad_norm=45.842, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.531e-04, train_time=1.369 +[gpub010:0/16] 2024-02-11 14:08:42,983 (trainer:762) INFO: 43epoch:train:9901-10000batch: iter_time=2.464e-04, forward_time=0.393, loss_ctc=40.749, loss_att=36.139, acc=0.783, loss=37.522, backward_time=0.319, grad_norm=40.458, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.116, optim0_lr0=1.531e-04, train_time=1.347 +[gpub010:0/16] 2024-02-11 14:09:03,036 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-02-11 14:09:22,743 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 14:09:26,260 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 14:09:26,260 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-02-11 14:09:26,410 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 14:16:19,719 (trainer:762) INFO: 43epoch:train:10001-10100batch: iter_time=3.177, forward_time=0.296, loss_ctc=52.810, loss_att=58.503, acc=0.728, loss=56.795, backward_time=0.298, grad_norm=57.860, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.531e-04, train_time=4.567 +[gpub010:0/16] 2024-02-11 14:18:48,269 (trainer:762) INFO: 43epoch:train:10101-10200batch: iter_time=7.840e-05, forward_time=0.353, loss_ctc=47.229, loss_att=44.550, acc=0.757, loss=45.354, backward_time=0.330, grad_norm=46.732, clip=100.000, loss_scale=5.452e+33, optim_step_time=0.098, optim0_lr0=1.531e-04, train_time=1.485 +[gpub010:0/16] 2024-02-11 14:20:44,569 (trainer:762) INFO: 43epoch:train:10201-10300batch: iter_time=8.095e-05, forward_time=0.292, loss_ctc=44.798, loss_att=41.051, acc=0.772, loss=42.175, backward_time=0.298, grad_norm=39.806, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.531e-04, train_time=1.162 +[gpub010:0/16] 2024-02-11 14:23:20,905 (trainer:762) INFO: 43epoch:train:10301-10400batch: iter_time=2.228e-04, forward_time=0.354, loss_ctc=54.897, loss_att=47.210, acc=0.749, loss=49.516, backward_time=0.310, grad_norm=51.494, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.113, optim0_lr0=1.531e-04, train_time=1.564 +[gpub010:0/16] 2024-02-11 14:25:23,298 (trainer:762) INFO: 43epoch:train:10401-10500batch: iter_time=8.126e-05, forward_time=0.354, loss_ctc=49.056, loss_att=44.022, acc=0.764, loss=45.532, backward_time=0.304, grad_norm=42.870, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.531e-04, train_time=1.224 +[gpub010:0/16] 2024-02-11 14:27:35,109 (trainer:762) INFO: 43epoch:train:10501-10600batch: iter_time=1.953e-04, forward_time=0.301, loss_ctc=47.095, loss_att=47.403, acc=0.766, loss=47.311, backward_time=0.297, grad_norm=46.773, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.530e-04, train_time=1.318 +[gpub010:0/16] 2024-02-11 14:30:01,265 (trainer:762) INFO: 43epoch:train:10601-10700batch: iter_time=8.199e-05, forward_time=0.415, loss_ctc=40.908, loss_att=37.498, acc=0.790, loss=38.521, backward_time=0.316, grad_norm=39.851, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.108, optim0_lr0=1.530e-04, train_time=1.461 +[gpub010:0/16] 2024-02-11 14:30:21,698 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 14:32:03,834 (trainer:762) INFO: 43epoch:train:10701-10800batch: iter_time=8.536e-05, forward_time=0.289, loss_ctc=40.184, loss_att=39.146, acc=0.767, loss=39.457, backward_time=0.294, grad_norm=40.517, clip=100.000, loss_scale=6.136e+33, optim_step_time=0.093, optim0_lr0=1.530e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 14:34:28,733 (trainer:762) INFO: 43epoch:train:10801-10900batch: iter_time=1.002e-04, forward_time=0.337, loss_ctc=54.714, loss_att=49.208, acc=0.773, loss=50.860, backward_time=0.300, grad_norm=43.513, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.530e-04, train_time=1.448 +[gpub010:0/16] 2024-02-11 14:36:35,568 (trainer:762) INFO: 43epoch:train:10901-11000batch: iter_time=8.688e-05, forward_time=0.351, loss_ctc=41.409, loss_att=40.741, acc=0.773, loss=40.941, backward_time=0.314, grad_norm=42.215, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.530e-04, train_time=1.269 +[gpub010:0/16] 2024-02-11 14:38:44,133 (trainer:762) INFO: 43epoch:train:11001-11100batch: iter_time=9.318e-05, forward_time=0.292, loss_ctc=46.437, loss_att=47.892, acc=0.766, loss=47.455, backward_time=0.297, grad_norm=44.185, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.530e-04, train_time=1.285 +[gpub010:0/16] 2024-02-11 14:40:46,539 (trainer:762) INFO: 43epoch:train:11101-11200batch: iter_time=1.062e-04, forward_time=0.305, loss_ctc=40.246, loss_att=36.221, acc=0.776, loss=37.428, backward_time=0.294, grad_norm=39.763, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.530e-04, train_time=1.223 +[gpub010:0/16] 2024-02-11 14:42:40,049 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-02-11 14:42:59,327 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 14:43:02,872 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 14:43:02,872 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-02-11 14:43:02,919 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 14:48:57,058 (trainer:762) INFO: 43epoch:train:11201-11300batch: iter_time=3.260, forward_time=0.382, loss_ctc=44.245, loss_att=48.067, acc=0.752, loss=46.920, backward_time=0.327, grad_norm=43.746, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.530e-04, train_time=4.906 +[gpub010:0/16] 2024-02-11 14:51:16,086 (trainer:762) INFO: 43epoch:train:11301-11400batch: iter_time=2.465e-04, forward_time=0.304, loss_ctc=50.569, loss_att=55.055, acc=0.749, loss=53.710, backward_time=0.298, grad_norm=48.658, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.530e-04, train_time=1.390 +[gpub010:0/16] 2024-02-11 14:53:35,857 (trainer:762) INFO: 43epoch:train:11401-11500batch: iter_time=7.773e-05, forward_time=0.385, loss_ctc=48.873, loss_att=44.778, acc=0.778, loss=46.007, backward_time=0.315, grad_norm=45.256, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=1.529e-04, train_time=1.397 +[gpub010:0/16] 2024-02-11 14:56:00,752 (trainer:762) INFO: 43epoch:train:11501-11600batch: iter_time=8.311e-05, forward_time=0.290, loss_ctc=43.341, loss_att=38.331, acc=0.768, loss=39.834, backward_time=0.294, grad_norm=41.379, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.529e-04, train_time=1.449 +[gpub010:0/16] 2024-02-11 14:58:04,203 (trainer:762) INFO: 43epoch:train:11601-11700batch: iter_time=5.015e-04, forward_time=0.305, loss_ctc=59.402, loss_att=52.667, acc=0.745, loss=54.688, backward_time=0.298, grad_norm=53.867, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.529e-04, train_time=1.234 +[gpub010:0/16] 2024-02-11 15:00:14,703 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 15:00:35,436 (trainer:762) INFO: 43epoch:train:11701-11800batch: iter_time=8.530e-05, forward_time=0.404, loss_ctc=47.252, loss_att=50.426, acc=0.771, loss=49.474, backward_time=0.325, grad_norm=44.031, clip=100.000, loss_scale=4.930e+33, optim_step_time=0.105, optim0_lr0=1.529e-04, train_time=1.512 +[gpub010:0/16] 2024-02-11 15:02:32,619 (trainer:762) INFO: 43epoch:train:11801-11900batch: iter_time=8.440e-05, forward_time=0.294, loss_ctc=41.904, loss_att=40.998, acc=0.796, loss=41.270, backward_time=0.297, grad_norm=39.374, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.529e-04, train_time=1.172 +[gpub010:0/16] 2024-02-11 15:04:39,049 (trainer:762) INFO: 43epoch:train:11901-12000batch: iter_time=7.954e-05, forward_time=0.303, loss_ctc=41.216, loss_att=37.099, acc=0.791, loss=38.334, backward_time=0.296, grad_norm=41.301, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.529e-04, train_time=1.264 +[gpub010:0/16] 2024-02-11 15:07:01,804 (trainer:762) INFO: 43epoch:train:12001-12100batch: iter_time=8.587e-05, forward_time=0.313, loss_ctc=45.317, loss_att=41.667, acc=0.781, loss=42.762, backward_time=0.323, grad_norm=42.379, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.529e-04, train_time=1.426 +[gpub010:0/16] 2024-02-11 15:09:11,809 (trainer:762) INFO: 43epoch:train:12101-12200batch: iter_time=8.249e-05, forward_time=0.316, loss_ctc=49.948, loss_att=49.739, acc=0.769, loss=49.802, backward_time=0.300, grad_norm=44.000, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.529e-04, train_time=1.300 +[gpub010:0/16] 2024-02-11 15:11:36,769 (trainer:762) INFO: 43epoch:train:12201-12300batch: iter_time=8.425e-05, forward_time=0.346, loss_ctc=40.700, loss_att=44.380, acc=0.774, loss=43.276, backward_time=0.338, grad_norm=42.684, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.528e-04, train_time=1.450 +[gpub010:0/16] 2024-02-11 15:13:49,628 (trainer:762) INFO: 43epoch:train:12301-12400batch: iter_time=8.638e-05, forward_time=0.291, loss_ctc=43.909, loss_att=42.779, acc=0.767, loss=43.118, backward_time=0.296, grad_norm=45.126, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.528e-04, train_time=1.327 +[gpub010:0/16] 2024-02-11 15:16:12,462 (trainer:762) INFO: 43epoch:train:12401-12500batch: iter_time=7.940e-05, forward_time=0.418, loss_ctc=40.503, loss_att=37.471, acc=0.788, loss=38.381, backward_time=0.336, grad_norm=38.052, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.528e-04, train_time=1.428 +[gpub010:0/16] 2024-02-11 15:16:32,693 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-02-11 15:16:51,753 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 15:16:55,399 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 15:16:55,399 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-02-11 15:16:55,444 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 15:23:53,647 (trainer:762) INFO: 43epoch:train:12501-12600batch: iter_time=3.177, forward_time=0.293, loss_ctc=52.550, loss_att=58.162, acc=0.744, loss=56.478, backward_time=0.299, grad_norm=52.239, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.528e-04, train_time=4.612 +[gpub010:0/16] 2024-02-11 15:25:53,540 (trainer:762) INFO: 43epoch:train:12601-12700batch: iter_time=7.888e-05, forward_time=0.309, loss_ctc=47.079, loss_att=45.264, acc=0.765, loss=45.808, backward_time=0.303, grad_norm=46.695, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.528e-04, train_time=1.198 +[gpub010:0/16] 2024-02-11 15:28:38,280 (trainer:762) INFO: 43epoch:train:12701-12800batch: iter_time=8.230e-05, forward_time=0.385, loss_ctc=44.504, loss_att=41.584, acc=0.774, loss=42.460, backward_time=0.310, grad_norm=38.637, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.101, optim0_lr0=1.528e-04, train_time=1.647 +[gpub010:0/16] 2024-02-11 15:30:49,703 (trainer:762) INFO: 43epoch:train:12801-12900batch: iter_time=8.731e-05, forward_time=0.296, loss_ctc=55.273, loss_att=47.818, acc=0.759, loss=50.055, backward_time=0.307, grad_norm=48.947, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.528e-04, train_time=1.314 +[gpub010:0/16] 2024-02-11 15:33:28,559 (trainer:762) INFO: 43epoch:train:12901-13000batch: iter_time=8.248e-05, forward_time=0.387, loss_ctc=48.376, loss_att=45.835, acc=0.766, loss=46.597, backward_time=0.318, grad_norm=48.444, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.528e-04, train_time=1.588 +[gpub010:0/16] 2024-02-11 15:35:19,185 (trainer:762) INFO: 43epoch:train:13001-13100batch: iter_time=8.095e-05, forward_time=0.293, loss_ctc=46.610, loss_att=48.332, acc=0.780, loss=47.815, backward_time=0.300, grad_norm=48.308, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.527e-04, train_time=1.106 +[gpub010:0/16] 2024-02-11 15:37:33,398 (trainer:762) INFO: 43epoch:train:13101-13200batch: iter_time=8.158e-05, forward_time=0.328, loss_ctc=40.635, loss_att=36.780, acc=0.800, loss=37.937, backward_time=0.306, grad_norm=39.018, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.527e-04, train_time=1.342 +[gpub010:0/16] 2024-02-11 15:40:06,763 (trainer:762) INFO: 43epoch:train:13201-13300batch: iter_time=8.348e-05, forward_time=0.372, loss_ctc=39.934, loss_att=38.714, acc=0.775, loss=39.080, backward_time=0.307, grad_norm=40.410, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=1.527e-04, train_time=1.533 +[gpub010:0/16] 2024-02-11 15:42:06,880 (trainer:762) INFO: 43epoch:train:13301-13400batch: iter_time=1.345e-04, forward_time=0.317, loss_ctc=54.195, loss_att=50.400, acc=0.779, loss=51.539, backward_time=0.302, grad_norm=44.121, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.527e-04, train_time=1.200 +[gpub010:0/16] 2024-02-11 15:44:47,291 (trainer:762) INFO: 43epoch:train:13401-13500batch: iter_time=8.669e-05, forward_time=0.398, loss_ctc=40.975, loss_att=40.782, acc=0.776, loss=40.839, backward_time=0.338, grad_norm=41.049, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.527e-04, train_time=1.605 +[gpub010:0/16] 2024-02-11 15:47:03,319 (trainer:762) INFO: 43epoch:train:13501-13600batch: iter_time=3.467e-04, forward_time=0.298, loss_ctc=46.054, loss_att=49.200, acc=0.767, loss=48.256, backward_time=0.299, grad_norm=44.749, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.095, optim0_lr0=1.527e-04, train_time=1.360 +[gpub010:0/16] 2024-02-11 15:49:18,289 (trainer:762) INFO: 43epoch:train:13601-13700batch: iter_time=8.107e-05, forward_time=0.294, loss_ctc=39.811, loss_att=36.613, acc=0.784, loss=37.573, backward_time=0.296, grad_norm=40.619, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.527e-04, train_time=1.349 +[gpub010:0/16] 2024-02-11 15:50:59,435 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-02-11 15:51:18,836 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 15:51:22,405 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 15:51:22,405 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-02-11 15:51:22,453 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 15:57:31,001 (trainer:762) INFO: 43epoch:train:13701-13800batch: iter_time=3.324, forward_time=0.376, loss_ctc=44.512, loss_att=47.556, acc=0.757, loss=46.643, backward_time=0.313, grad_norm=44.703, clip=100.000, loss_scale=2.856e+33, optim_step_time=0.102, optim0_lr0=1.527e-04, train_time=4.928 +[gpub010:0/16] 2024-02-11 15:59:27,863 (trainer:762) INFO: 43epoch:train:13801-13900batch: iter_time=8.479e-05, forward_time=0.302, loss_ctc=50.409, loss_att=52.817, acc=0.755, loss=52.095, backward_time=0.298, grad_norm=47.985, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.527e-04, train_time=1.168 +[gpub010:0/16] 2024-02-11 16:01:58,944 (trainer:762) INFO: 43epoch:train:13901-14000batch: iter_time=8.288e-05, forward_time=0.344, loss_ctc=48.536, loss_att=44.250, acc=0.778, loss=45.536, backward_time=0.338, grad_norm=45.660, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.526e-04, train_time=1.511 +[gpub010:0/16] 2024-02-11 16:04:06,731 (trainer:762) INFO: 43epoch:train:14001-14100batch: iter_time=8.913e-05, forward_time=0.291, loss_ctc=43.377, loss_att=38.426, acc=0.765, loss=39.911, backward_time=0.296, grad_norm=42.101, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.526e-04, train_time=1.278 +[gpub010:0/16] 2024-02-11 16:06:49,738 (trainer:762) INFO: 43epoch:train:14101-14200batch: iter_time=8.701e-05, forward_time=0.366, loss_ctc=59.084, loss_att=51.203, acc=0.750, loss=53.567, backward_time=0.324, grad_norm=54.702, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.526e-04, train_time=1.630 +[gpub010:0/16] 2024-02-11 16:08:46,862 (trainer:762) INFO: 43epoch:train:14201-14300batch: iter_time=8.378e-05, forward_time=0.293, loss_ctc=47.286, loss_att=50.218, acc=0.773, loss=49.339, backward_time=0.299, grad_norm=47.369, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.526e-04, train_time=1.171 +[gpub010:0/16] 2024-02-11 16:11:13,702 (trainer:762) INFO: 43epoch:train:14301-14400batch: iter_time=8.008e-05, forward_time=0.291, loss_ctc=41.679, loss_att=40.994, acc=0.795, loss=41.200, backward_time=0.297, grad_norm=40.776, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.526e-04, train_time=1.468 +[gpub010:0/16] 2024-02-11 16:13:49,405 (trainer:762) INFO: 43epoch:train:14401-14500batch: iter_time=4.736e-04, forward_time=0.394, loss_ctc=41.189, loss_att=36.767, acc=0.793, loss=38.094, backward_time=0.338, grad_norm=42.406, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.526e-04, train_time=1.557 +[gpub010:0/16] 2024-02-11 16:16:00,845 (trainer:762) INFO: 43epoch:train:14501-14600batch: iter_time=8.321e-05, forward_time=0.291, loss_ctc=45.793, loss_att=41.779, acc=0.782, loss=42.983, backward_time=0.296, grad_norm=41.756, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.526e-04, train_time=1.314 +[gpub010:0/16] 2024-02-11 16:18:44,557 (trainer:762) INFO: 43epoch:train:14601-14700batch: iter_time=8.082e-05, forward_time=0.376, loss_ctc=49.636, loss_att=48.926, acc=0.770, loss=49.139, backward_time=0.313, grad_norm=44.173, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.526e-04, train_time=1.637 +[gpub010:0/16] 2024-02-11 16:20:48,374 (trainer:762) INFO: 43epoch:train:14701-14800batch: iter_time=8.814e-05, forward_time=0.294, loss_ctc=40.631, loss_att=45.063, acc=0.770, loss=43.733, backward_time=0.299, grad_norm=39.913, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.525e-04, train_time=1.238 +[gpub010:0/16] 2024-02-11 16:23:08,456 (trainer:762) INFO: 43epoch:train:14801-14900batch: iter_time=8.024e-05, forward_time=0.296, loss_ctc=44.375, loss_att=42.433, acc=0.768, loss=43.016, backward_time=0.296, grad_norm=43.389, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.525e-04, train_time=1.400 +[gpub010:0/16] 2024-02-11 16:25:28,929 (trainer:762) INFO: 43epoch:train:14901-15000batch: iter_time=0.020, forward_time=0.340, loss_ctc=40.728, loss_att=37.536, acc=0.787, loss=38.493, backward_time=0.365, grad_norm=37.445, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.525e-04, train_time=1.405 +[gpub010:0/16] 2024-02-11 17:03:32,602 (trainer:361) INFO: 43epoch results: [train] iter_time=0.258, forward_time=0.329, loss_ctc=46.884, loss_att=44.899, acc=0.767, loss=45.494, backward_time=0.310, grad_norm=44.884, clip=100.000, loss_scale=5.931e+33, optim_step_time=0.096, optim0_lr0=1.534e-04, train_time=1.603, time=6 hours, 41 minutes and 15.29 seconds, total_count=675000, gpu_max_cached_mem_GB=42.092, [valid] loss_ctc=35.196, cer_ctc=0.178, loss_att=36.829, acc=0.707, cer=0.251, wer=0.984, loss=36.339, time=37 minutes and 39.48 seconds, total_count=210195, gpu_max_cached_mem_GB=42.092 +[gpub010:0/16] 2024-02-11 17:03:42,418 (trainer:416) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub010:0/16] 2024-02-11 17:03:42,478 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/32epoch.pth, exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/38epoch.pth +[gpub010:0/16] 2024-02-11 17:03:42,479 (trainer:290) INFO: 44/45epoch started. Estimated time to finish: 14 hours, 35 minutes and 32.51 seconds +[gpub010:0/16] 2024-02-11 17:03:42,498 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-02-11 17:04:01,008 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 17:04:04,401 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 17:04:04,401 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-02-11 17:04:04,404 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 17:10:31,840 (trainer:762) INFO: 44epoch:train:1-100batch: iter_time=2.917, forward_time=0.362, loss_ctc=42.799, loss_att=39.156, acc=0.773, loss=40.249, backward_time=0.303, grad_norm=42.901, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.525e-04, train_time=4.093 +[gpub010:0/16] 2024-02-11 17:12:42,915 (trainer:762) INFO: 44epoch:train:101-200batch: iter_time=8.346e-05, forward_time=0.292, loss_ctc=45.987, loss_att=45.411, acc=0.768, loss=45.584, backward_time=0.298, grad_norm=42.098, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.525e-04, train_time=1.311 +[gpub010:0/16] 2024-02-11 17:15:02,702 (trainer:762) INFO: 44epoch:train:201-300batch: iter_time=8.391e-05, forward_time=0.298, loss_ctc=46.829, loss_att=50.097, acc=0.750, loss=49.117, backward_time=0.301, grad_norm=44.905, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.525e-04, train_time=1.398 +[gpub010:0/16] 2024-02-11 17:17:22,740 (trainer:762) INFO: 44epoch:train:301-400batch: iter_time=8.153e-05, forward_time=0.358, loss_ctc=46.632, loss_att=52.906, acc=0.741, loss=51.023, backward_time=0.330, grad_norm=46.320, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.525e-04, train_time=1.400 +[gpub010:0/16] 2024-02-11 17:19:35,133 (trainer:762) INFO: 44epoch:train:401-500batch: iter_time=8.032e-05, forward_time=0.290, loss_ctc=40.703, loss_att=42.654, acc=0.756, loss=42.068, backward_time=0.294, grad_norm=49.156, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.525e-04, train_time=1.324 +[gpub010:0/16] 2024-02-11 17:21:44,184 (trainer:762) INFO: 44epoch:train:501-600batch: iter_time=9.087e-05, forward_time=0.297, loss_ctc=49.250, loss_att=49.794, acc=0.758, loss=49.631, backward_time=0.298, grad_norm=49.705, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.525e-04, train_time=1.290 +[gpub010:0/16] 2024-02-11 17:24:00,084 (trainer:762) INFO: 44epoch:train:601-700batch: iter_time=8.215e-05, forward_time=0.352, loss_ctc=49.503, loss_att=47.287, acc=0.760, loss=47.951, backward_time=0.313, grad_norm=48.567, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.524e-04, train_time=1.359 +[gpub010:0/16] 2024-02-11 17:26:28,665 (trainer:762) INFO: 44epoch:train:701-800batch: iter_time=8.546e-05, forward_time=0.304, loss_ctc=46.366, loss_att=41.617, acc=0.760, loss=43.042, backward_time=0.302, grad_norm=55.972, clip=100.000, loss_scale=5.712e+33, optim_step_time=0.093, optim0_lr0=1.524e-04, train_time=1.485 +[gpub010:0/16] 2024-02-11 17:27:27,894 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 17:28:23,593 (trainer:762) INFO: 44epoch:train:801-900batch: iter_time=8.765e-05, forward_time=0.313, loss_ctc=55.443, loss_att=56.391, acc=0.743, loss=56.106, backward_time=0.300, grad_norm=55.095, clip=100.000, loss_scale=7.972e+33, optim_step_time=0.093, optim0_lr0=1.524e-04, train_time=1.149 +[gpub010:0/16] 2024-02-11 17:30:44,173 (trainer:762) INFO: 44epoch:train:901-1000batch: iter_time=8.753e-05, forward_time=0.292, loss_ctc=45.339, loss_att=41.765, acc=0.761, loss=42.837, backward_time=0.300, grad_norm=42.318, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.524e-04, train_time=1.406 +[gpub010:0/16] 2024-02-11 17:32:55,772 (trainer:762) INFO: 44epoch:train:1001-1100batch: iter_time=5.823e-04, forward_time=0.374, loss_ctc=42.082, loss_att=35.638, acc=0.779, loss=37.571, backward_time=0.338, grad_norm=41.359, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.524e-04, train_time=1.316 +[gpub010:0/16] 2024-02-11 17:35:14,097 (trainer:762) INFO: 44epoch:train:1101-1200batch: iter_time=9.392e-05, forward_time=0.291, loss_ctc=44.691, loss_att=46.995, acc=0.734, loss=46.304, backward_time=0.295, grad_norm=49.025, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.524e-04, train_time=1.383 +[gpub010:0/16] 2024-02-11 17:36:41,300 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-02-11 17:37:00,238 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 17:37:03,768 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 17:37:03,768 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-02-11 17:37:03,771 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 17:42:28,974 (trainer:762) INFO: 44epoch:train:1201-1300batch: iter_time=2.931, forward_time=0.295, loss_ctc=47.750, loss_att=44.035, acc=0.768, loss=45.150, backward_time=0.298, grad_norm=44.451, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.524e-04, train_time=4.349 +[gpub010:0/16] 2024-02-11 17:45:03,029 (trainer:762) INFO: 44epoch:train:1301-1400batch: iter_time=8.486e-05, forward_time=0.376, loss_ctc=44.576, loss_att=44.946, acc=0.769, loss=44.835, backward_time=0.312, grad_norm=44.280, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.524e-04, train_time=1.540 +[gpub010:0/16] 2024-02-11 17:47:32,783 (trainer:762) INFO: 44epoch:train:1401-1500batch: iter_time=8.059e-05, forward_time=0.292, loss_ctc=42.577, loss_att=41.445, acc=0.762, loss=41.785, backward_time=0.296, grad_norm=37.994, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.523e-04, train_time=1.497 +[gpub010:0/16] 2024-02-11 17:49:45,235 (trainer:762) INFO: 44epoch:train:1501-1600batch: iter_time=9.362e-05, forward_time=0.296, loss_ctc=47.906, loss_att=56.842, acc=0.739, loss=54.161, backward_time=0.302, grad_norm=46.475, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.523e-04, train_time=1.324 +[gpub010:0/16] 2024-02-11 17:51:47,834 (trainer:762) INFO: 44epoch:train:1601-1700batch: iter_time=8.386e-05, forward_time=0.290, loss_ctc=42.679, loss_att=44.629, acc=0.752, loss=44.044, backward_time=0.297, grad_norm=76.684, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.523e-04, train_time=1.226 +[gpub010:0/16] 2024-02-11 17:54:01,403 (trainer:762) INFO: 44epoch:train:1701-1800batch: iter_time=8.625e-05, forward_time=0.372, loss_ctc=45.292, loss_att=47.686, acc=0.766, loss=46.968, backward_time=0.323, grad_norm=46.834, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.523e-04, train_time=1.335 +[gpub010:0/16] 2024-02-11 17:56:18,209 (trainer:762) INFO: 44epoch:train:1801-1900batch: iter_time=9.177e-05, forward_time=0.291, loss_ctc=49.701, loss_att=46.187, acc=0.759, loss=47.241, backward_time=0.295, grad_norm=50.355, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.523e-04, train_time=1.369 +[gpub010:0/16] 2024-02-11 17:58:20,093 (trainer:762) INFO: 44epoch:train:1901-2000batch: iter_time=8.726e-05, forward_time=0.296, loss_ctc=42.088, loss_att=39.677, acc=0.773, loss=40.400, backward_time=0.299, grad_norm=44.226, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.523e-04, train_time=1.219 +[gpub010:0/16] 2024-02-11 18:00:30,257 (trainer:762) INFO: 44epoch:train:2001-2100batch: iter_time=9.044e-05, forward_time=0.318, loss_ctc=47.438, loss_att=45.740, acc=0.760, loss=46.249, backward_time=0.319, grad_norm=47.917, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.523e-04, train_time=1.301 +[gpub010:0/16] 2024-02-11 18:02:44,246 (trainer:762) INFO: 44epoch:train:2101-2200batch: iter_time=8.739e-05, forward_time=0.293, loss_ctc=53.148, loss_att=54.325, acc=0.745, loss=53.972, backward_time=0.297, grad_norm=50.109, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.523e-04, train_time=1.338 +[gpub010:0/16] 2024-02-11 18:04:38,013 (trainer:762) INFO: 44epoch:train:2201-2300batch: iter_time=8.497e-05, forward_time=0.294, loss_ctc=42.643, loss_att=36.868, acc=0.774, loss=38.600, backward_time=0.300, grad_norm=40.058, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.523e-04, train_time=1.138 +[gpub010:0/16] 2024-02-11 18:06:05,730 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 18:07:25,447 (trainer:762) INFO: 44epoch:train:2301-2400batch: iter_time=8.712e-05, forward_time=0.340, loss_ctc=44.025, loss_att=41.447, acc=0.751, loss=42.220, backward_time=0.354, grad_norm=49.500, clip=100.000, loss_scale=3.960e+33, optim_step_time=0.099, optim0_lr0=1.522e-04, train_time=1.675 +[gpub010:0/16] 2024-02-11 18:09:46,693 (trainer:762) INFO: 44epoch:train:2401-2500batch: iter_time=8.172e-05, forward_time=0.291, loss_ctc=47.522, loss_att=49.157, acc=0.754, loss=48.666, backward_time=0.297, grad_norm=44.302, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.522e-04, train_time=1.412 +[gpub010:0/16] 2024-02-11 18:10:06,722 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-02-11 18:10:26,049 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 18:10:29,612 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 18:10:29,613 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-02-11 18:10:29,616 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 18:17:42,114 (trainer:762) INFO: 44epoch:train:2501-2600batch: iter_time=3.261, forward_time=0.295, loss_ctc=41.218, loss_att=40.836, acc=0.765, loss=40.951, backward_time=0.296, grad_norm=42.098, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.522e-04, train_time=4.754 +[gpub010:0/16] 2024-02-11 18:20:11,651 (trainer:762) INFO: 44epoch:train:2601-2700batch: iter_time=7.940e-05, forward_time=0.427, loss_ctc=44.991, loss_att=43.456, acc=0.772, loss=43.916, backward_time=0.320, grad_norm=40.824, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.522e-04, train_time=1.495 +[gpub010:0/16] 2024-02-11 18:22:19,656 (trainer:762) INFO: 44epoch:train:2701-2800batch: iter_time=7.918e-05, forward_time=0.291, loss_ctc=45.769, loss_att=50.305, acc=0.746, loss=48.944, backward_time=0.297, grad_norm=45.665, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.522e-04, train_time=1.280 +[gpub010:0/16] 2024-02-11 18:24:42,117 (trainer:762) INFO: 44epoch:train:2801-2900batch: iter_time=8.352e-05, forward_time=0.296, loss_ctc=45.617, loss_att=51.307, acc=0.741, loss=49.600, backward_time=0.299, grad_norm=44.035, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.522e-04, train_time=1.423 +[gpub010:0/16] 2024-02-11 18:27:07,580 (trainer:762) INFO: 44epoch:train:2901-3000batch: iter_time=8.320e-05, forward_time=0.381, loss_ctc=39.300, loss_att=42.394, acc=0.755, loss=41.466, backward_time=0.344, grad_norm=45.668, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.104, optim0_lr0=1.522e-04, train_time=1.456 +[gpub010:0/16] 2024-02-11 18:29:34,692 (trainer:762) INFO: 44epoch:train:3001-3100batch: iter_time=7.869e-05, forward_time=0.295, loss_ctc=47.739, loss_att=48.956, acc=0.755, loss=48.591, backward_time=0.296, grad_norm=49.806, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.522e-04, train_time=1.471 +[gpub010:0/16] 2024-02-11 18:31:26,690 (trainer:762) INFO: 44epoch:train:3101-3200batch: iter_time=8.261e-05, forward_time=0.293, loss_ctc=48.161, loss_att=46.557, acc=0.757, loss=47.038, backward_time=0.299, grad_norm=46.853, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.521e-04, train_time=1.119 +[gpub010:0/16] 2024-02-11 18:34:08,198 (trainer:762) INFO: 44epoch:train:3201-3300batch: iter_time=8.379e-05, forward_time=0.425, loss_ctc=44.824, loss_att=40.770, acc=0.756, loss=41.987, backward_time=0.312, grad_norm=50.429, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.521e-04, train_time=1.615 +[gpub010:0/16] 2024-02-11 18:36:26,662 (trainer:762) INFO: 44epoch:train:3301-3400batch: iter_time=8.308e-05, forward_time=0.297, loss_ctc=53.826, loss_att=52.892, acc=0.744, loss=53.172, backward_time=0.303, grad_norm=50.520, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.521e-04, train_time=1.384 +[gpub010:0/16] 2024-02-11 18:38:15,147 (trainer:762) INFO: 44epoch:train:3401-3500batch: iter_time=8.779e-05, forward_time=0.292, loss_ctc=44.093, loss_att=41.983, acc=0.753, loss=42.616, backward_time=0.300, grad_norm=40.714, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.521e-04, train_time=1.084 +[gpub010:0/16] 2024-02-11 18:40:57,219 (trainer:762) INFO: 44epoch:train:3501-3600batch: iter_time=8.502e-05, forward_time=0.381, loss_ctc=41.201, loss_att=35.902, acc=0.772, loss=37.492, backward_time=0.320, grad_norm=41.683, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.105, optim0_lr0=1.521e-04, train_time=1.621 +[gpub010:0/16] 2024-02-11 18:42:45,122 (trainer:762) INFO: 44epoch:train:3601-3700batch: iter_time=8.350e-05, forward_time=0.290, loss_ctc=43.440, loss_att=45.885, acc=0.738, loss=45.151, backward_time=0.297, grad_norm=50.452, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.521e-04, train_time=1.079 +[gpub010:0/16] 2024-02-11 18:44:18,107 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-02-11 18:44:37,233 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 18:44:40,779 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 18:44:40,779 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-02-11 18:44:40,835 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 18:50:13,936 (trainer:762) INFO: 44epoch:train:3701-3800batch: iter_time=3.099, forward_time=0.397, loss_ctc=47.618, loss_att=44.151, acc=0.766, loss=45.191, backward_time=0.316, grad_norm=43.564, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.521e-04, train_time=4.488 +[gpub010:0/16] 2024-02-11 18:52:20,818 (trainer:762) INFO: 44epoch:train:3801-3900batch: iter_time=8.016e-05, forward_time=0.294, loss_ctc=44.758, loss_att=46.625, acc=0.766, loss=46.065, backward_time=0.298, grad_norm=46.345, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.521e-04, train_time=1.269 +[gpub010:0/16] 2024-02-11 18:54:37,314 (trainer:762) INFO: 44epoch:train:3901-4000batch: iter_time=8.166e-05, forward_time=0.292, loss_ctc=42.532, loss_att=42.262, acc=0.764, loss=42.343, backward_time=0.297, grad_norm=40.798, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.521e-04, train_time=1.365 +[gpub010:0/16] 2024-02-11 18:56:40,051 (trainer:762) INFO: 44epoch:train:4001-4100batch: iter_time=8.024e-05, forward_time=0.383, loss_ctc=47.644, loss_att=56.955, acc=0.742, loss=54.161, backward_time=0.336, grad_norm=46.220, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.098, optim0_lr0=1.520e-04, train_time=1.227 +[gpub010:0/16] 2024-02-11 18:58:42,501 (trainer:762) INFO: 44epoch:train:4101-4200batch: iter_time=7.985e-05, forward_time=0.295, loss_ctc=41.862, loss_att=44.252, acc=0.754, loss=43.535, backward_time=0.296, grad_norm=45.679, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.520e-04, train_time=1.224 +[gpub010:0/16] 2024-02-11 19:01:11,875 (trainer:762) INFO: 44epoch:train:4201-4300batch: iter_time=8.403e-05, forward_time=0.292, loss_ctc=44.785, loss_att=47.015, acc=0.770, loss=46.346, backward_time=0.296, grad_norm=45.993, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.520e-04, train_time=1.494 +[gpub010:0/16] 2024-02-11 19:03:03,340 (trainer:762) INFO: 44epoch:train:4301-4400batch: iter_time=8.141e-05, forward_time=0.293, loss_ctc=48.798, loss_att=45.922, acc=0.761, loss=46.785, backward_time=0.301, grad_norm=45.628, clip=100.000, loss_scale=3.816e+33, optim_step_time=0.095, optim0_lr0=1.520e-04, train_time=1.114 +[gpub010:0/16] 2024-02-11 19:05:04,615 (trainer:762) INFO: 44epoch:train:4401-4500batch: iter_time=8.044e-05, forward_time=0.367, loss_ctc=42.249, loss_att=39.725, acc=0.774, loss=40.482, backward_time=0.320, grad_norm=47.080, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.520e-04, train_time=1.213 +[gpub010:0/16] 2024-02-11 19:07:36,420 (trainer:762) INFO: 44epoch:train:4501-4600batch: iter_time=8.873e-05, forward_time=0.295, loss_ctc=47.347, loss_att=45.756, acc=0.761, loss=46.234, backward_time=0.298, grad_norm=46.014, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.520e-04, train_time=1.518 +[gpub010:0/16] 2024-02-11 19:09:28,377 (trainer:762) INFO: 44epoch:train:4601-4700batch: iter_time=8.076e-05, forward_time=0.318, loss_ctc=51.897, loss_att=53.880, acc=0.747, loss=53.285, backward_time=0.305, grad_norm=52.430, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.520e-04, train_time=1.119 +[gpub010:0/16] 2024-02-11 19:11:26,968 (trainer:762) INFO: 44epoch:train:4701-4800batch: iter_time=9.281e-05, forward_time=0.354, loss_ctc=42.171, loss_att=36.797, acc=0.776, loss=38.409, backward_time=0.327, grad_norm=40.970, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.520e-04, train_time=1.185 +[gpub010:0/16] 2024-02-11 19:14:03,063 (trainer:762) INFO: 44epoch:train:4801-4900batch: iter_time=8.766e-05, forward_time=0.290, loss_ctc=43.599, loss_att=40.698, acc=0.754, loss=41.568, backward_time=0.294, grad_norm=45.796, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.519e-04, train_time=1.561 +[gpub010:0/16] 2024-02-11 19:15:58,141 (trainer:762) INFO: 44epoch:train:4901-5000batch: iter_time=8.259e-05, forward_time=0.293, loss_ctc=47.202, loss_att=48.794, acc=0.755, loss=48.317, backward_time=0.300, grad_norm=46.474, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.519e-04, train_time=1.151 +[gpub010:0/16] 2024-02-11 19:16:18,210 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-02-11 19:16:38,027 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 19:16:41,547 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 19:16:41,547 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-02-11 19:16:41,551 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 19:23:50,231 (trainer:762) INFO: 44epoch:train:5001-5100batch: iter_time=3.289, forward_time=0.385, loss_ctc=40.913, loss_att=39.938, acc=0.768, loss=40.230, backward_time=0.314, grad_norm=43.466, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.519e-04, train_time=4.721 +[gpub010:0/16] 2024-02-11 19:26:09,021 (trainer:762) INFO: 44epoch:train:5101-5200batch: iter_time=8.061e-05, forward_time=0.292, loss_ctc=44.807, loss_att=42.674, acc=0.775, loss=43.314, backward_time=0.297, grad_norm=41.255, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.519e-04, train_time=1.388 +[gpub010:0/16] 2024-02-11 19:28:11,265 (trainer:762) INFO: 44epoch:train:5201-5300batch: iter_time=8.461e-05, forward_time=0.354, loss_ctc=45.268, loss_att=49.139, acc=0.750, loss=47.978, backward_time=0.332, grad_norm=44.110, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.519e-04, train_time=1.222 +[gpub010:0/16] 2024-02-11 19:30:29,776 (trainer:762) INFO: 44epoch:train:5301-5400batch: iter_time=8.461e-05, forward_time=0.290, loss_ctc=44.984, loss_att=50.273, acc=0.744, loss=48.686, backward_time=0.295, grad_norm=43.653, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.519e-04, train_time=1.384 +[gpub010:0/16] 2024-02-11 19:32:33,709 (trainer:762) INFO: 44epoch:train:5401-5500batch: iter_time=8.879e-05, forward_time=0.290, loss_ctc=38.881, loss_att=41.408, acc=0.761, loss=40.650, backward_time=0.294, grad_norm=45.600, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.519e-04, train_time=1.239 +[gpub010:0/16] 2024-02-11 19:34:59,972 (trainer:762) INFO: 44epoch:train:5501-5600batch: iter_time=3.777e-04, forward_time=0.407, loss_ctc=47.549, loss_att=49.155, acc=0.755, loss=48.673, backward_time=0.320, grad_norm=47.685, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.109, optim0_lr0=1.519e-04, train_time=1.462 +[gpub010:0/16] 2024-02-11 19:37:02,010 (trainer:762) INFO: 44epoch:train:5601-5700batch: iter_time=8.248e-05, forward_time=0.292, loss_ctc=47.594, loss_att=46.285, acc=0.759, loss=46.678, backward_time=0.297, grad_norm=49.103, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.519e-04, train_time=1.220 +[gpub010:0/16] 2024-02-11 19:39:05,930 (trainer:762) INFO: 44epoch:train:5701-5800batch: iter_time=8.280e-05, forward_time=0.290, loss_ctc=45.003, loss_att=40.656, acc=0.757, loss=41.960, backward_time=0.294, grad_norm=46.761, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.518e-04, train_time=1.239 +[gpub010:0/16] 2024-02-11 19:41:26,115 (trainer:762) INFO: 44epoch:train:5801-5900batch: iter_time=8.710e-05, forward_time=0.383, loss_ctc=52.728, loss_att=53.076, acc=0.745, loss=52.972, backward_time=0.337, grad_norm=48.805, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.518e-04, train_time=1.402 +[gpub010:0/16] 2024-02-11 19:43:41,112 (trainer:762) INFO: 44epoch:train:5901-6000batch: iter_time=8.114e-05, forward_time=0.291, loss_ctc=43.874, loss_att=41.821, acc=0.754, loss=42.437, backward_time=0.294, grad_norm=44.065, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.518e-04, train_time=1.349 +[gpub010:0/16] 2024-02-11 19:45:40,085 (trainer:762) INFO: 44epoch:train:6001-6100batch: iter_time=8.066e-05, forward_time=0.341, loss_ctc=40.776, loss_att=35.924, acc=0.773, loss=37.380, backward_time=0.337, grad_norm=41.156, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.518e-04, train_time=1.190 +[gpub010:0/16] 2024-02-11 19:46:50,409 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 19:47:52,069 (trainer:762) INFO: 44epoch:train:6101-6200batch: iter_time=8.522e-05, forward_time=0.291, loss_ctc=42.988, loss_att=45.035, acc=0.743, loss=44.421, backward_time=0.295, grad_norm=49.889, clip=100.000, loss_scale=4.222e+33, optim_step_time=0.093, optim0_lr0=1.518e-04, train_time=1.320 +[gpub010:0/16] 2024-02-11 19:49:24,862 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-02-11 19:49:44,273 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 19:49:47,797 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 19:49:47,797 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-02-11 19:49:47,852 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 19:54:39,844 (trainer:762) INFO: 44epoch:train:6201-6300batch: iter_time=2.757, forward_time=0.291, loss_ctc=46.826, loss_att=43.552, acc=0.765, loss=44.534, backward_time=0.297, grad_norm=41.854, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.518e-04, train_time=4.078 +[gpub010:0/16] 2024-02-11 19:56:44,559 (trainer:762) INFO: 44epoch:train:6301-6400batch: iter_time=8.142e-05, forward_time=0.406, loss_ctc=44.126, loss_att=43.065, acc=0.768, loss=43.384, backward_time=0.333, grad_norm=44.976, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.518e-04, train_time=1.247 +[gpub010:0/16] 2024-02-11 19:59:11,816 (trainer:762) INFO: 44epoch:train:6401-6500batch: iter_time=8.124e-05, forward_time=0.291, loss_ctc=42.132, loss_att=40.602, acc=0.765, loss=41.061, backward_time=0.295, grad_norm=40.235, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.518e-04, train_time=1.472 +[gpub010:0/16] 2024-02-11 20:01:08,518 (trainer:762) INFO: 44epoch:train:6501-6600batch: iter_time=8.241e-05, forward_time=0.315, loss_ctc=47.045, loss_att=54.292, acc=0.737, loss=52.118, backward_time=0.300, grad_norm=44.244, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.517e-04, train_time=1.167 +[gpub010:0/16] 2024-02-11 20:03:43,139 (trainer:762) INFO: 44epoch:train:6601-6700batch: iter_time=8.460e-05, forward_time=0.372, loss_ctc=41.707, loss_att=43.752, acc=0.752, loss=43.139, backward_time=0.325, grad_norm=45.756, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.517e-04, train_time=1.546 +[gpub010:0/16] 2024-02-11 20:05:46,103 (trainer:762) INFO: 44epoch:train:6701-6800batch: iter_time=8.220e-05, forward_time=0.291, loss_ctc=44.429, loss_att=46.567, acc=0.767, loss=45.925, backward_time=0.297, grad_norm=46.559, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.517e-04, train_time=1.230 +[gpub010:0/16] 2024-02-11 20:07:46,469 (trainer:762) INFO: 44epoch:train:6801-6900batch: iter_time=8.345e-05, forward_time=0.289, loss_ctc=48.759, loss_att=46.349, acc=0.754, loss=47.072, backward_time=0.295, grad_norm=47.456, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.517e-04, train_time=1.203 +[gpub010:0/16] 2024-02-11 20:09:50,816 (trainer:762) INFO: 44epoch:train:6901-7000batch: iter_time=7.807e-05, forward_time=0.290, loss_ctc=41.365, loss_att=38.848, acc=0.768, loss=39.603, backward_time=0.295, grad_norm=46.870, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.517e-04, train_time=1.243 +[gpub010:0/16] 2024-02-11 20:12:12,444 (trainer:762) INFO: 44epoch:train:7001-7100batch: iter_time=2.265e-04, forward_time=0.335, loss_ctc=46.462, loss_att=44.047, acc=0.755, loss=44.771, backward_time=0.366, grad_norm=45.647, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.096, optim0_lr0=1.517e-04, train_time=1.415 +[gpub010:0/16] 2024-02-11 20:14:01,670 (trainer:762) INFO: 44epoch:train:7101-7200batch: iter_time=8.109e-05, forward_time=0.293, loss_ctc=51.395, loss_att=51.802, acc=0.741, loss=51.680, backward_time=0.298, grad_norm=49.783, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.517e-04, train_time=1.093 +[gpub010:0/16] 2024-02-11 20:16:33,616 (trainer:762) INFO: 44epoch:train:7201-7300batch: iter_time=8.478e-05, forward_time=0.290, loss_ctc=41.686, loss_att=37.734, acc=0.763, loss=38.919, backward_time=0.294, grad_norm=41.276, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.517e-04, train_time=1.519 +[gpub010:0/16] 2024-02-11 20:18:39,552 (trainer:762) INFO: 44epoch:train:7301-7400batch: iter_time=8.977e-05, forward_time=0.356, loss_ctc=43.236, loss_att=39.933, acc=0.755, loss=40.924, backward_time=0.351, grad_norm=45.539, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.099, optim0_lr0=1.517e-04, train_time=1.259 +[gpub010:0/16] 2024-02-11 20:20:53,811 (trainer:762) INFO: 44epoch:train:7401-7500batch: iter_time=8.131e-05, forward_time=0.292, loss_ctc=47.019, loss_att=48.163, acc=0.750, loss=47.820, backward_time=0.297, grad_norm=43.323, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.516e-04, train_time=1.341 +[gpub010:0/16] 2024-02-11 20:21:14,012 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-02-11 20:21:33,384 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 20:21:36,888 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 20:21:36,889 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-02-11 20:21:36,892 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 20:28:51,873 (trainer:762) INFO: 44epoch:train:7501-7600batch: iter_time=3.454, forward_time=0.318, loss_ctc=40.588, loss_att=38.209, acc=0.773, loss=38.923, backward_time=0.344, grad_norm=42.350, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.097, optim0_lr0=1.516e-04, train_time=4.781 +[gpub010:0/16] 2024-02-11 20:31:18,258 (trainer:762) INFO: 44epoch:train:7601-7700batch: iter_time=7.848e-05, forward_time=0.337, loss_ctc=44.652, loss_att=41.969, acc=0.777, loss=42.774, backward_time=0.297, grad_norm=38.356, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.516e-04, train_time=1.464 +[gpub010:0/16] 2024-02-11 20:33:11,322 (trainer:762) INFO: 44epoch:train:7701-7800batch: iter_time=8.135e-05, forward_time=0.294, loss_ctc=45.047, loss_att=48.156, acc=0.753, loss=47.223, backward_time=0.301, grad_norm=42.935, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.093, optim0_lr0=1.516e-04, train_time=1.129 +[gpub010:0/16] 2024-02-11 20:36:03,576 (trainer:762) INFO: 44epoch:train:7801-7900batch: iter_time=8.231e-05, forward_time=0.369, loss_ctc=44.782, loss_att=50.438, acc=0.744, loss=48.741, backward_time=0.321, grad_norm=43.646, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.103, optim0_lr0=1.516e-04, train_time=1.724 +[gpub010:0/16] 2024-02-11 20:37:51,002 (trainer:762) INFO: 44epoch:train:7901-8000batch: iter_time=8.450e-05, forward_time=0.290, loss_ctc=38.907, loss_att=41.679, acc=0.760, loss=40.848, backward_time=0.296, grad_norm=45.288, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.516e-04, train_time=1.074 +[gpub010:0/16] 2024-02-11 20:39:59,587 (trainer:762) INFO: 44epoch:train:8001-8100batch: iter_time=8.760e-05, forward_time=0.299, loss_ctc=46.833, loss_att=48.227, acc=0.758, loss=47.809, backward_time=0.298, grad_norm=48.038, clip=100.000, loss_scale=2.596e+33, optim_step_time=0.094, optim0_lr0=1.516e-04, train_time=1.285 +[gpub010:0/16] 2024-02-11 20:42:37,939 (trainer:762) INFO: 44epoch:train:8101-8200batch: iter_time=8.052e-05, forward_time=0.401, loss_ctc=48.116, loss_att=45.807, acc=0.762, loss=46.500, backward_time=0.338, grad_norm=45.296, clip=100.000, loss_scale=3.557e+33, optim_step_time=0.099, optim0_lr0=1.516e-04, train_time=1.584 +[gpub010:0/16] 2024-02-11 20:44:26,836 (trainer:762) INFO: 44epoch:train:8201-8300batch: iter_time=8.072e-05, forward_time=0.290, loss_ctc=44.586, loss_att=40.029, acc=0.760, loss=41.396, backward_time=0.296, grad_norm=52.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.516e-04, train_time=1.089 +[gpub010:0/16] 2024-02-11 20:46:30,668 (trainer:762) INFO: 44epoch:train:8301-8400batch: iter_time=8.539e-05, forward_time=0.296, loss_ctc=52.588, loss_att=51.954, acc=0.748, loss=52.145, backward_time=0.301, grad_norm=47.934, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.515e-04, train_time=1.238 +[gpub010:0/16] 2024-02-11 20:49:00,595 (trainer:762) INFO: 44epoch:train:8401-8500batch: iter_time=1.922e-04, forward_time=0.344, loss_ctc=43.677, loss_att=41.182, acc=0.756, loss=41.931, backward_time=0.344, grad_norm=41.574, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.515e-04, train_time=1.499 +[gpub010:0/16] 2024-02-11 20:51:08,247 (trainer:762) INFO: 44epoch:train:8501-8600batch: iter_time=8.380e-05, forward_time=0.291, loss_ctc=40.639, loss_att=35.476, acc=0.776, loss=37.025, backward_time=0.295, grad_norm=38.988, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.515e-04, train_time=1.276 +[gpub010:0/16] 2024-02-11 20:53:16,099 (trainer:762) INFO: 44epoch:train:8601-8700batch: iter_time=8.092e-05, forward_time=0.296, loss_ctc=42.875, loss_att=45.431, acc=0.743, loss=44.664, backward_time=0.298, grad_norm=48.761, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.515e-04, train_time=1.278 +[gpub010:0/16] 2024-02-11 20:54:33,023 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-02-11 20:54:52,818 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 20:54:56,412 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 20:54:56,412 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-02-11 20:54:56,415 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 21:00:20,937 (trainer:762) INFO: 44epoch:train:8701-8800batch: iter_time=3.000, forward_time=0.390, loss_ctc=47.027, loss_att=43.790, acc=0.765, loss=44.761, backward_time=0.314, grad_norm=41.678, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.515e-04, train_time=4.249 +[gpub010:0/16] 2024-02-11 21:02:08,634 (trainer:762) INFO: 44epoch:train:8801-8900batch: iter_time=8.142e-05, forward_time=0.291, loss_ctc=43.452, loss_att=42.450, acc=0.772, loss=42.750, backward_time=0.297, grad_norm=43.419, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.515e-04, train_time=1.076 +[gpub010:0/16] 2024-02-11 21:04:37,115 (trainer:762) INFO: 44epoch:train:8901-9000batch: iter_time=2.377e-04, forward_time=0.375, loss_ctc=41.521, loss_att=40.303, acc=0.767, loss=40.668, backward_time=0.309, grad_norm=38.710, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.515e-04, train_time=1.485 +[gpub010:0/16] 2024-02-11 21:06:41,702 (trainer:762) INFO: 44epoch:train:9001-9100batch: iter_time=8.478e-05, forward_time=0.291, loss_ctc=47.450, loss_att=54.385, acc=0.738, loss=52.305, backward_time=0.297, grad_norm=46.816, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.515e-04, train_time=1.246 +[gpub010:0/16] 2024-02-11 21:08:51,024 (trainer:762) INFO: 44epoch:train:9101-9200batch: iter_time=8.314e-05, forward_time=0.291, loss_ctc=41.226, loss_att=43.952, acc=0.752, loss=43.134, backward_time=0.300, grad_norm=46.937, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.514e-04, train_time=1.293 +[gpub010:0/16] 2024-02-11 21:11:02,319 (trainer:762) INFO: 44epoch:train:9201-9300batch: iter_time=8.300e-05, forward_time=0.343, loss_ctc=43.777, loss_att=46.101, acc=0.769, loss=45.404, backward_time=0.356, grad_norm=45.751, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.514e-04, train_time=1.313 +[gpub010:0/16] 2024-02-11 21:13:14,205 (trainer:762) INFO: 44epoch:train:9301-9400batch: iter_time=8.237e-05, forward_time=0.290, loss_ctc=48.021, loss_att=45.403, acc=0.758, loss=46.188, backward_time=0.294, grad_norm=48.104, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.514e-04, train_time=1.319 +[gpub010:0/16] 2024-02-11 21:15:05,573 (trainer:762) INFO: 44epoch:train:9401-9500batch: iter_time=8.137e-05, forward_time=0.295, loss_ctc=41.624, loss_att=38.564, acc=0.772, loss=39.482, backward_time=0.302, grad_norm=47.132, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.514e-04, train_time=1.113 +[gpub010:0/16] 2024-02-11 21:17:28,664 (trainer:762) INFO: 44epoch:train:9501-9600batch: iter_time=2.759e-04, forward_time=0.326, loss_ctc=46.236, loss_att=43.265, acc=0.759, loss=44.157, backward_time=0.324, grad_norm=44.294, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.514e-04, train_time=1.431 +[gpub010:0/16] 2024-02-11 21:19:32,020 (trainer:762) INFO: 44epoch:train:9601-9700batch: iter_time=8.067e-05, forward_time=0.292, loss_ctc=51.175, loss_att=51.973, acc=0.740, loss=51.733, backward_time=0.296, grad_norm=49.000, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.514e-04, train_time=1.234 +[gpub010:0/16] 2024-02-11 21:22:07,433 (trainer:762) INFO: 44epoch:train:9701-9800batch: iter_time=9.051e-05, forward_time=0.411, loss_ctc=41.541, loss_att=37.018, acc=0.767, loss=38.375, backward_time=0.319, grad_norm=40.895, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.514e-04, train_time=1.554 +[gpub010:0/16] 2024-02-11 21:23:59,287 (trainer:762) INFO: 44epoch:train:9801-9900batch: iter_time=7.926e-05, forward_time=0.289, loss_ctc=42.966, loss_att=39.543, acc=0.757, loss=40.570, backward_time=0.295, grad_norm=49.893, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.514e-04, train_time=1.118 +[gpub010:0/16] 2024-02-11 21:26:17,040 (trainer:762) INFO: 44epoch:train:9901-10000batch: iter_time=8.059e-05, forward_time=0.294, loss_ctc=46.641, loss_att=48.180, acc=0.749, loss=47.718, backward_time=0.304, grad_norm=44.384, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.514e-04, train_time=1.378 +[gpub010:0/16] 2024-02-11 21:26:37,085 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-02-11 21:26:56,451 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 21:27:00,015 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 21:27:00,015 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-02-11 21:27:00,056 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 21:33:55,199 (trainer:762) INFO: 44epoch:train:10001-10100batch: iter_time=3.209, forward_time=0.390, loss_ctc=40.285, loss_att=39.358, acc=0.781, loss=39.636, backward_time=0.310, grad_norm=41.063, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.513e-04, train_time=4.581 +[gpub010:0/16] 2024-02-11 21:35:58,968 (trainer:762) INFO: 44epoch:train:10101-10200batch: iter_time=8.533e-05, forward_time=0.292, loss_ctc=44.321, loss_att=45.139, acc=0.774, loss=44.893, backward_time=0.297, grad_norm=39.648, clip=100.000, loss_scale=7.113e+33, optim_step_time=0.093, optim0_lr0=1.513e-04, train_time=1.238 +[gpub010:0/16] 2024-02-11 21:38:13,897 (trainer:762) INFO: 44epoch:train:10201-10300batch: iter_time=8.043e-05, forward_time=0.344, loss_ctc=45.231, loss_att=50.387, acc=0.754, loss=48.840, backward_time=0.390, grad_norm=44.830, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.513e-04, train_time=1.349 +[gpub010:0/16] 2024-02-11 21:40:15,252 (trainer:762) INFO: 44epoch:train:10301-10400batch: iter_time=8.699e-05, forward_time=0.292, loss_ctc=44.762, loss_att=53.109, acc=0.745, loss=50.605, backward_time=0.297, grad_norm=46.679, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.513e-04, train_time=1.213 +[gpub010:0/16] 2024-02-11 21:42:31,199 (trainer:762) INFO: 44epoch:train:10401-10500batch: iter_time=8.482e-05, forward_time=0.299, loss_ctc=38.166, loss_att=41.618, acc=0.766, loss=40.582, backward_time=0.300, grad_norm=43.592, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.513e-04, train_time=1.359 +[gpub010:0/16] 2024-02-11 21:45:00,159 (trainer:762) INFO: 44epoch:train:10501-10600batch: iter_time=0.003, forward_time=0.321, loss_ctc=47.337, loss_att=48.620, acc=0.766, loss=48.235, backward_time=0.334, grad_norm=45.864, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.513e-04, train_time=1.490 +[gpub010:0/16] 2024-02-11 21:46:48,353 (trainer:762) INFO: 44epoch:train:10601-10700batch: iter_time=8.956e-05, forward_time=0.292, loss_ctc=47.165, loss_att=46.337, acc=0.770, loss=46.585, backward_time=0.298, grad_norm=44.970, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.513e-04, train_time=1.082 +[gpub010:0/16] 2024-02-11 21:49:02,672 (trainer:762) INFO: 44epoch:train:10701-10800batch: iter_time=9.196e-05, forward_time=0.298, loss_ctc=44.306, loss_att=40.686, acc=0.769, loss=41.772, backward_time=0.295, grad_norm=47.200, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.513e-04, train_time=1.342 +[gpub010:0/16] 2024-02-11 21:51:22,138 (trainer:762) INFO: 44epoch:train:10801-10900batch: iter_time=8.867e-05, forward_time=0.347, loss_ctc=52.144, loss_att=55.224, acc=0.752, loss=54.300, backward_time=0.369, grad_norm=51.073, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.513e-04, train_time=1.395 +[gpub010:0/16] 2024-02-11 21:52:51,467 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 21:53:12,933 (trainer:762) INFO: 44epoch:train:10901-11000batch: iter_time=8.591e-05, forward_time=0.292, loss_ctc=43.408, loss_att=41.811, acc=0.764, loss=42.290, backward_time=0.298, grad_norm=42.023, clip=100.000, loss_scale=9.336e+33, optim_step_time=0.094, optim0_lr0=1.512e-04, train_time=1.108 +[gpub010:0/16] 2024-02-11 21:55:50,359 (trainer:762) INFO: 44epoch:train:11001-11100batch: iter_time=8.454e-05, forward_time=0.304, loss_ctc=40.677, loss_att=35.633, acc=0.783, loss=37.146, backward_time=0.296, grad_norm=39.579, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.512e-04, train_time=1.573 +[gpub010:0/16] 2024-02-11 21:57:51,064 (trainer:762) INFO: 44epoch:train:11101-11200batch: iter_time=2.969e-04, forward_time=0.362, loss_ctc=42.053, loss_att=45.017, acc=0.747, loss=44.128, backward_time=0.318, grad_norm=49.062, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.512e-04, train_time=1.207 +[gpub010:0/16] 2024-02-11 21:59:05,433 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-02-11 21:59:25,152 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 21:59:28,771 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 21:59:28,771 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-02-11 21:59:28,777 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 22:04:49,982 (trainer:762) INFO: 44epoch:train:11201-11300batch: iter_time=2.873, forward_time=0.400, loss_ctc=46.945, loss_att=44.299, acc=0.771, loss=45.093, backward_time=0.321, grad_norm=40.679, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.512e-04, train_time=4.189 +[gpub010:0/16] 2024-02-11 22:07:03,446 (trainer:762) INFO: 44epoch:train:11301-11400batch: iter_time=8.298e-05, forward_time=0.290, loss_ctc=43.544, loss_att=44.054, acc=0.767, loss=43.901, backward_time=0.294, grad_norm=42.783, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.512e-04, train_time=1.335 +[gpub010:0/16] 2024-02-11 22:09:15,643 (trainer:762) INFO: 44epoch:train:11401-11500batch: iter_time=8.097e-05, forward_time=0.371, loss_ctc=41.675, loss_att=40.651, acc=0.767, loss=40.959, backward_time=0.332, grad_norm=39.014, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.512e-04, train_time=1.322 +[gpub010:0/16] 2024-02-11 22:11:33,505 (trainer:762) INFO: 44epoch:train:11501-11600batch: iter_time=8.396e-05, forward_time=0.292, loss_ctc=46.359, loss_att=54.436, acc=0.740, loss=52.013, backward_time=0.298, grad_norm=46.232, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.512e-04, train_time=1.379 +[gpub010:0/16] 2024-02-11 22:13:49,929 (trainer:762) INFO: 44epoch:train:11601-11700batch: iter_time=8.539e-05, forward_time=0.374, loss_ctc=41.326, loss_att=43.098, acc=0.755, loss=42.566, backward_time=0.318, grad_norm=45.776, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.512e-04, train_time=1.364 +[gpub010:0/16] 2024-02-11 22:16:04,200 (trainer:762) INFO: 44epoch:train:11701-11800batch: iter_time=8.902e-05, forward_time=0.320, loss_ctc=43.637, loss_att=46.774, acc=0.766, loss=45.833, backward_time=0.309, grad_norm=47.677, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.511e-04, train_time=1.343 +[gpub010:0/16] 2024-02-11 22:18:12,425 (trainer:762) INFO: 44epoch:train:11801-11900batch: iter_time=9.082e-05, forward_time=0.355, loss_ctc=48.160, loss_att=46.200, acc=0.757, loss=46.788, backward_time=0.307, grad_norm=45.989, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.106, optim0_lr0=1.511e-04, train_time=1.282 +[gpub010:0/16] 2024-02-11 22:20:32,782 (trainer:762) INFO: 44epoch:train:11901-12000batch: iter_time=8.431e-05, forward_time=0.382, loss_ctc=41.410, loss_att=38.732, acc=0.771, loss=39.535, backward_time=0.328, grad_norm=46.521, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.511e-04, train_time=1.403 +[gpub010:0/16] 2024-02-11 22:22:51,627 (trainer:762) INFO: 44epoch:train:12001-12100batch: iter_time=8.327e-05, forward_time=0.294, loss_ctc=46.625, loss_att=44.009, acc=0.757, loss=44.794, backward_time=0.297, grad_norm=44.219, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.511e-04, train_time=1.388 +[gpub010:0/16] 2024-02-11 22:25:06,948 (trainer:762) INFO: 44epoch:train:12101-12200batch: iter_time=8.816e-05, forward_time=0.381, loss_ctc=50.655, loss_att=52.167, acc=0.743, loss=51.713, backward_time=0.324, grad_norm=47.459, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.103, optim0_lr0=1.511e-04, train_time=1.353 +[gpub010:0/16] 2024-02-11 22:27:06,321 (trainer:762) INFO: 44epoch:train:12201-12300batch: iter_time=8.483e-05, forward_time=0.291, loss_ctc=41.840, loss_att=37.826, acc=0.764, loss=39.031, backward_time=0.295, grad_norm=42.943, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.511e-04, train_time=1.193 +[gpub010:0/16] 2024-02-11 22:29:42,534 (trainer:762) INFO: 44epoch:train:12301-12400batch: iter_time=8.661e-05, forward_time=0.372, loss_ctc=42.835, loss_att=39.795, acc=0.759, loss=40.707, backward_time=0.316, grad_norm=43.655, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.511e-04, train_time=1.562 +[gpub010:0/16] 2024-02-11 22:31:45,933 (trainer:762) INFO: 44epoch:train:12401-12500batch: iter_time=7.760e-05, forward_time=0.293, loss_ctc=46.318, loss_att=47.830, acc=0.752, loss=47.377, backward_time=0.298, grad_norm=42.670, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.511e-04, train_time=1.234 +[gpub010:0/16] 2024-02-11 22:32:06,012 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-02-11 22:32:25,321 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 22:32:28,898 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 22:32:28,898 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-02-11 22:32:28,901 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 22:39:36,402 (trainer:762) INFO: 44epoch:train:12501-12600batch: iter_time=3.184, forward_time=0.385, loss_ctc=40.140, loss_att=38.574, acc=0.783, loss=39.044, backward_time=0.315, grad_norm=41.045, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.511e-04, train_time=4.704 +[gpub010:0/16] 2024-02-11 22:41:49,040 (trainer:762) INFO: 44epoch:train:12601-12700batch: iter_time=8.483e-05, forward_time=0.302, loss_ctc=44.305, loss_att=44.463, acc=0.775, loss=44.416, backward_time=0.299, grad_norm=39.882, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.510e-04, train_time=1.326 +[gpub010:0/16] 2024-02-11 22:43:52,535 (trainer:762) INFO: 44epoch:train:12701-12800batch: iter_time=8.735e-05, forward_time=0.317, loss_ctc=44.842, loss_att=49.896, acc=0.757, loss=48.380, backward_time=0.300, grad_norm=45.610, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.510e-04, train_time=1.235 +[gpub010:0/16] 2024-02-11 22:46:23,782 (trainer:762) INFO: 44epoch:train:12801-12900batch: iter_time=8.737e-05, forward_time=0.358, loss_ctc=44.791, loss_att=53.293, acc=0.745, loss=50.742, backward_time=0.318, grad_norm=47.293, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.510e-04, train_time=1.511 +[gpub010:0/16] 2024-02-11 22:48:34,994 (trainer:762) INFO: 44epoch:train:12901-13000batch: iter_time=9.141e-05, forward_time=0.307, loss_ctc=38.273, loss_att=41.675, acc=0.767, loss=40.655, backward_time=0.305, grad_norm=44.406, clip=100.000, loss_scale=6.231e+33, optim_step_time=0.106, optim0_lr0=1.510e-04, train_time=1.312 +[gpub010:0/16] 2024-02-11 22:50:47,821 (trainer:762) INFO: 44epoch:train:13001-13100batch: iter_time=8.706e-05, forward_time=0.297, loss_ctc=46.635, loss_att=48.165, acc=0.766, loss=47.706, backward_time=0.300, grad_norm=47.557, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.510e-04, train_time=1.328 +[gpub010:0/16] 2024-02-11 22:53:02,127 (trainer:762) INFO: 44epoch:train:13101-13200batch: iter_time=1.238e-04, forward_time=0.332, loss_ctc=47.468, loss_att=45.782, acc=0.770, loss=46.288, backward_time=0.320, grad_norm=47.193, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.510e-04, train_time=1.342 +[gpub010:0/16] 2024-02-11 22:54:59,657 (trainer:762) INFO: 44epoch:train:13201-13300batch: iter_time=8.985e-05, forward_time=0.306, loss_ctc=44.084, loss_att=40.741, acc=0.769, loss=41.744, backward_time=0.305, grad_norm=46.608, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.510e-04, train_time=1.175 +[gpub010:0/16] 2024-02-11 22:57:36,683 (trainer:762) INFO: 44epoch:train:13301-13400batch: iter_time=1.857e-04, forward_time=0.384, loss_ctc=52.365, loss_att=54.789, acc=0.753, loss=54.062, backward_time=0.315, grad_norm=48.220, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.510e-04, train_time=1.571 +[gpub010:0/16] 2024-02-11 22:59:29,508 (trainer:762) INFO: 44epoch:train:13401-13500batch: iter_time=8.450e-05, forward_time=0.295, loss_ctc=42.846, loss_att=41.353, acc=0.767, loss=41.801, backward_time=0.312, grad_norm=40.556, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.510e-04, train_time=1.128 +[gpub010:0/16] 2024-02-11 23:01:50,237 (trainer:762) INFO: 44epoch:train:13501-13600batch: iter_time=8.394e-05, forward_time=0.301, loss_ctc=40.315, loss_att=35.387, acc=0.782, loss=36.866, backward_time=0.302, grad_norm=38.729, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.509e-04, train_time=1.407 +[gpub010:0/16] 2024-02-11 23:04:03,710 (trainer:762) INFO: 44epoch:train:13601-13700batch: iter_time=9.865e-05, forward_time=0.339, loss_ctc=42.009, loss_att=45.023, acc=0.748, loss=44.119, backward_time=0.335, grad_norm=48.181, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.509e-04, train_time=1.335 +[gpub010:0/16] 2024-02-11 23:05:27,313 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-02-11 23:05:46,761 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-11 23:05:50,390 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-11 23:05:50,390 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-02-11 23:05:50,394 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-11 23:11:46,461 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-11 23:12:02,535 (trainer:762) INFO: 44epoch:train:13701-13800batch: iter_time=3.530, forward_time=0.334, loss_ctc=46.759, loss_att=42.978, acc=0.779, loss=44.112, backward_time=0.308, grad_norm=39.947, clip=100.000, loss_scale=9.598e+33, optim_step_time=0.095, optim0_lr0=1.509e-04, train_time=4.788 +[gpub010:0/16] 2024-02-11 23:14:12,127 (trainer:762) INFO: 44epoch:train:13801-13900batch: iter_time=7.978e-05, forward_time=0.309, loss_ctc=43.105, loss_att=44.062, acc=0.776, loss=43.775, backward_time=0.299, grad_norm=41.674, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.509e-04, train_time=1.296 +[gpub010:0/16] 2024-02-11 23:16:35,739 (trainer:762) INFO: 44epoch:train:13901-14000batch: iter_time=8.466e-05, forward_time=0.341, loss_ctc=41.397, loss_att=41.429, acc=0.767, loss=41.419, backward_time=0.340, grad_norm=39.713, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.105, optim0_lr0=1.509e-04, train_time=1.436 +[gpub010:0/16] 2024-02-11 23:18:27,665 (trainer:762) INFO: 44epoch:train:14001-14100batch: iter_time=8.525e-05, forward_time=0.293, loss_ctc=46.723, loss_att=56.273, acc=0.743, loss=53.408, backward_time=0.300, grad_norm=46.331, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.509e-04, train_time=1.119 +[gpub010:0/16] 2024-02-11 23:20:56,467 (trainer:762) INFO: 44epoch:train:14101-14200batch: iter_time=4.150e-04, forward_time=0.355, loss_ctc=40.847, loss_att=44.632, acc=0.756, loss=43.496, backward_time=0.344, grad_norm=46.728, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.100, optim0_lr0=1.509e-04, train_time=1.488 +[gpub010:0/16] 2024-02-11 23:22:56,966 (trainer:762) INFO: 44epoch:train:14201-14300batch: iter_time=8.550e-05, forward_time=0.291, loss_ctc=43.756, loss_att=45.919, acc=0.775, loss=45.270, backward_time=0.296, grad_norm=45.444, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.509e-04, train_time=1.205 +[gpub010:0/16] 2024-02-11 23:24:59,544 (trainer:762) INFO: 44epoch:train:14301-14400batch: iter_time=8.318e-05, forward_time=0.313, loss_ctc=48.093, loss_att=46.228, acc=0.762, loss=46.788, backward_time=0.297, grad_norm=47.309, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.508e-04, train_time=1.225 +[gpub010:0/16] 2024-02-11 23:27:32,625 (trainer:762) INFO: 44epoch:train:14401-14500batch: iter_time=2.350e-04, forward_time=0.415, loss_ctc=40.957, loss_att=39.029, acc=0.780, loss=39.608, backward_time=0.322, grad_norm=42.860, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.508e-04, train_time=1.529 +[gpub010:0/16] 2024-02-11 23:29:26,193 (trainer:762) INFO: 44epoch:train:14501-14600batch: iter_time=8.052e-05, forward_time=0.293, loss_ctc=46.493, loss_att=44.862, acc=0.767, loss=45.351, backward_time=0.298, grad_norm=44.646, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.508e-04, train_time=1.137 +[gpub010:0/16] 2024-02-11 23:31:54,074 (trainer:762) INFO: 44epoch:train:14601-14700batch: iter_time=7.973e-05, forward_time=0.311, loss_ctc=50.494, loss_att=53.127, acc=0.751, loss=52.337, backward_time=0.308, grad_norm=48.463, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.508e-04, train_time=1.478 +[gpub010:0/16] 2024-02-11 23:34:08,506 (trainer:762) INFO: 44epoch:train:14701-14800batch: iter_time=0.001, forward_time=0.420, loss_ctc=41.356, loss_att=36.947, acc=0.775, loss=38.270, backward_time=0.330, grad_norm=39.112, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.508e-04, train_time=1.343 +[gpub010:0/16] 2024-02-11 23:36:13,813 (trainer:762) INFO: 44epoch:train:14801-14900batch: iter_time=8.372e-05, forward_time=0.304, loss_ctc=42.749, loss_att=40.399, acc=0.757, loss=41.104, backward_time=0.296, grad_norm=45.499, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.508e-04, train_time=1.254 +[gpub010:0/16] 2024-02-11 23:38:34,046 (trainer:762) INFO: 44epoch:train:14901-15000batch: iter_time=3.777e-04, forward_time=0.366, loss_ctc=46.031, loss_att=47.283, acc=0.763, loss=46.907, backward_time=0.312, grad_norm=42.696, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.124, optim0_lr0=1.508e-04, train_time=1.402 +[gpub010:0/16] 2024-02-12 00:16:51,582 (trainer:361) INFO: 44epoch results: [train] iter_time=0.250, forward_time=0.324, loss_ctc=44.941, loss_att=44.930, acc=0.760, loss=44.933, backward_time=0.310, grad_norm=45.340, clip=100.000, loss_scale=5.083e+33, optim_step_time=0.096, optim0_lr0=1.516e-04, train_time=1.579, time=6 hours, 35 minutes and 15.43 seconds, total_count=690000, gpu_max_cached_mem_GB=42.092, [valid] loss_ctc=34.331, cer_ctc=0.174, loss_att=36.037, acc=0.702, cer=0.295, wer=0.989, loss=35.525, time=37 minutes and 53.4 seconds, total_count=214866, gpu_max_cached_mem_GB=42.092 +[gpub010:0/16] 2024-02-12 00:17:01,726 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-02-12 00:17:01,804 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/39epoch.pth +[gpub010:0/16] 2024-02-12 00:17:01,804 (trainer:290) INFO: 45/45epoch started. Estimated time to finish: 7 hours, 16 minutes and 52.87 seconds +[gpub010:0/16] 2024-02-12 00:17:01,815 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub010:0/16] 2024-02-12 00:17:20,448 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 00:17:23,878 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 00:17:23,878 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub010:0/16] 2024-02-12 00:17:23,881 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 00:24:14,665 (trainer:762) INFO: 45epoch:train:1-100batch: iter_time=3.087, forward_time=0.330, loss_ctc=45.604, loss_att=48.084, acc=0.760, loss=47.340, backward_time=0.304, grad_norm=43.861, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.508e-04, train_time=4.328 +[gpub010:0/16] 2024-02-12 00:26:09,193 (trainer:762) INFO: 45epoch:train:101-200batch: iter_time=8.028e-05, forward_time=0.297, loss_ctc=44.787, loss_att=39.089, acc=0.778, loss=40.798, backward_time=0.296, grad_norm=42.092, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.508e-04, train_time=1.145 +[gpub010:0/16] 2024-02-12 00:28:30,854 (trainer:762) INFO: 45epoch:train:201-300batch: iter_time=8.308e-05, forward_time=0.333, loss_ctc=49.519, loss_att=45.390, acc=0.762, loss=46.629, backward_time=0.316, grad_norm=43.125, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.507e-04, train_time=1.416 +[gpub010:0/16] 2024-02-12 00:30:40,845 (trainer:762) INFO: 45epoch:train:301-400batch: iter_time=4.609e-04, forward_time=0.314, loss_ctc=37.336, loss_att=34.083, acc=0.779, loss=35.059, backward_time=0.308, grad_norm=40.330, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.507e-04, train_time=1.300 +[gpub010:0/16] 2024-02-12 00:32:55,496 (trainer:762) INFO: 45epoch:train:401-500batch: iter_time=7.967e-05, forward_time=0.307, loss_ctc=48.241, loss_att=47.416, acc=0.790, loss=47.664, backward_time=0.301, grad_norm=42.099, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.507e-04, train_time=1.346 +[gpub010:0/16] 2024-02-12 00:35:16,885 (trainer:762) INFO: 45epoch:train:501-600batch: iter_time=7.871e-05, forward_time=0.364, loss_ctc=62.130, loss_att=51.874, acc=0.763, loss=54.951, backward_time=0.334, grad_norm=51.393, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.107, optim0_lr0=1.507e-04, train_time=1.413 +[gpub010:0/16] 2024-02-12 00:37:21,283 (trainer:762) INFO: 45epoch:train:601-700batch: iter_time=8.397e-05, forward_time=0.303, loss_ctc=53.023, loss_att=46.606, acc=0.770, loss=48.531, backward_time=0.299, grad_norm=49.542, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.507e-04, train_time=1.244 +[gpub010:0/16] 2024-02-12 00:39:42,005 (trainer:762) INFO: 45epoch:train:701-800batch: iter_time=8.500e-05, forward_time=0.298, loss_ctc=51.596, loss_att=41.695, acc=0.768, loss=44.665, backward_time=0.299, grad_norm=48.494, clip=100.000, loss_scale=5.971e+33, optim_step_time=0.094, optim0_lr0=1.507e-04, train_time=1.407 +[gpub010:0/16] 2024-02-12 00:41:53,495 (trainer:762) INFO: 45epoch:train:801-900batch: iter_time=8.319e-05, forward_time=0.348, loss_ctc=46.238, loss_att=44.107, acc=0.760, loss=44.746, backward_time=0.330, grad_norm=43.670, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.507e-04, train_time=1.314 +[gpub010:0/16] 2024-02-12 00:44:05,405 (trainer:762) INFO: 45epoch:train:901-1000batch: iter_time=7.940e-05, forward_time=0.297, loss_ctc=44.598, loss_att=46.158, acc=0.774, loss=45.690, backward_time=0.297, grad_norm=40.350, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.507e-04, train_time=1.319 +[gpub010:0/16] 2024-02-12 00:46:10,212 (trainer:762) INFO: 45epoch:train:1001-1100batch: iter_time=2.265e-04, forward_time=0.315, loss_ctc=40.559, loss_att=44.004, acc=0.757, loss=42.970, backward_time=0.318, grad_norm=43.678, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.507e-04, train_time=1.248 +[gpub010:0/16] 2024-02-12 00:48:28,455 (trainer:762) INFO: 45epoch:train:1101-1200batch: iter_time=8.106e-05, forward_time=0.331, loss_ctc=51.074, loss_att=46.655, acc=0.744, loss=47.980, backward_time=0.316, grad_norm=55.772, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.506e-04, train_time=1.382 +[gpub010:0/16] 2024-02-12 00:49:49,681 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub010:0/16] 2024-02-12 00:50:08,823 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 00:50:12,410 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 00:50:12,410 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub010:0/16] 2024-02-12 00:50:12,432 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 00:55:10,674 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-12 00:56:03,765 (trainer:762) INFO: 45epoch:train:1201-1300batch: iter_time=3.296, forward_time=0.333, loss_ctc=43.705, loss_att=49.027, acc=0.754, loss=47.430, backward_time=0.299, grad_norm=47.848, clip=100.000, loss_scale=7.815e+33, optim_step_time=0.094, optim0_lr0=1.506e-04, train_time=4.553 +[gpub010:0/16] 2024-02-12 00:58:15,896 (trainer:762) INFO: 45epoch:train:1301-1400batch: iter_time=8.014e-05, forward_time=0.326, loss_ctc=44.310, loss_att=43.992, acc=0.755, loss=44.088, backward_time=0.335, grad_norm=41.488, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.506e-04, train_time=1.321 +[gpub010:0/16] 2024-02-12 01:00:31,681 (trainer:762) INFO: 45epoch:train:1401-1500batch: iter_time=7.964e-05, forward_time=0.304, loss_ctc=46.981, loss_att=40.713, acc=0.770, loss=42.593, backward_time=0.294, grad_norm=43.719, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.506e-04, train_time=1.357 +[gpub010:0/16] 2024-02-12 01:02:46,070 (trainer:762) INFO: 45epoch:train:1501-1600batch: iter_time=8.389e-05, forward_time=0.294, loss_ctc=44.054, loss_att=39.542, acc=0.767, loss=40.896, backward_time=0.296, grad_norm=40.805, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.506e-04, train_time=1.344 +[gpub010:0/16] 2024-02-12 01:04:55,501 (trainer:762) INFO: 45epoch:train:1601-1700batch: iter_time=8.300e-05, forward_time=0.348, loss_ctc=41.551, loss_att=40.595, acc=0.776, loss=40.882, backward_time=0.315, grad_norm=40.095, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.506e-04, train_time=1.294 +[gpub010:0/16] 2024-02-12 01:07:09,510 (trainer:762) INFO: 45epoch:train:1701-1800batch: iter_time=8.059e-05, forward_time=0.297, loss_ctc=47.712, loss_att=49.038, acc=0.773, loss=48.640, backward_time=0.296, grad_norm=42.040, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.506e-04, train_time=1.340 +[gpub010:0/16] 2024-02-12 01:09:14,156 (trainer:762) INFO: 45epoch:train:1801-1900batch: iter_time=8.301e-05, forward_time=0.294, loss_ctc=59.287, loss_att=48.075, acc=0.761, loss=51.438, backward_time=0.297, grad_norm=49.589, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.506e-04, train_time=1.245 +[gpub010:0/16] 2024-02-12 01:11:33,817 (trainer:762) INFO: 45epoch:train:1901-2000batch: iter_time=8.374e-05, forward_time=0.324, loss_ctc=60.295, loss_att=47.483, acc=0.765, loss=51.327, backward_time=0.319, grad_norm=50.435, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.099, optim0_lr0=1.506e-04, train_time=1.397 +[gpub010:0/16] 2024-02-12 01:13:52,799 (trainer:762) INFO: 45epoch:train:2001-2100batch: iter_time=7.829e-05, forward_time=0.330, loss_ctc=46.907, loss_att=45.648, acc=0.738, loss=46.026, backward_time=0.323, grad_norm=45.888, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.505e-04, train_time=1.390 +[gpub010:0/16] 2024-02-12 01:16:17,791 (trainer:762) INFO: 45epoch:train:2101-2200batch: iter_time=7.858e-05, forward_time=0.300, loss_ctc=40.683, loss_att=39.308, acc=0.773, loss=39.721, backward_time=0.295, grad_norm=39.895, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.505e-04, train_time=1.450 +[gpub010:0/16] 2024-02-12 01:18:33,818 (trainer:762) INFO: 45epoch:train:2201-2300batch: iter_time=8.078e-05, forward_time=0.311, loss_ctc=42.477, loss_att=50.323, acc=0.749, loss=47.969, backward_time=0.303, grad_norm=41.856, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.505e-04, train_time=1.360 +[gpub010:0/16] 2024-02-12 01:20:53,167 (trainer:762) INFO: 45epoch:train:2301-2400batch: iter_time=7.921e-05, forward_time=0.353, loss_ctc=44.709, loss_att=40.898, acc=0.769, loss=42.041, backward_time=0.306, grad_norm=43.138, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.505e-04, train_time=1.394 +[gpub010:0/16] 2024-02-12 01:22:59,959 (trainer:762) INFO: 45epoch:train:2401-2500batch: iter_time=7.740e-05, forward_time=0.301, loss_ctc=47.786, loss_att=46.282, acc=0.742, loss=46.733, backward_time=0.298, grad_norm=55.217, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.505e-04, train_time=1.268 +[gpub010:0/16] 2024-02-12 01:23:19,987 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub010:0/16] 2024-02-12 01:23:39,266 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 01:23:42,779 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 01:23:42,779 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub010:0/16] 2024-02-12 01:23:42,782 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 01:30:44,781 (trainer:762) INFO: 45epoch:train:2501-2600batch: iter_time=3.226, forward_time=0.321, loss_ctc=44.656, loss_att=48.345, acc=0.761, loss=47.239, backward_time=0.304, grad_norm=44.202, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.505e-04, train_time=4.648 +[gpub010:0/16] 2024-02-12 01:33:06,218 (trainer:762) INFO: 45epoch:train:2601-2700batch: iter_time=8.637e-05, forward_time=0.340, loss_ctc=42.463, loss_att=39.171, acc=0.781, loss=40.159, backward_time=0.299, grad_norm=40.676, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.113, optim0_lr0=1.505e-04, train_time=1.415 +[gpub010:0/16] 2024-02-12 01:35:15,702 (trainer:762) INFO: 45epoch:train:2701-2800batch: iter_time=8.265e-05, forward_time=0.319, loss_ctc=48.966, loss_att=44.679, acc=0.766, loss=45.965, backward_time=0.317, grad_norm=43.126, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.505e-04, train_time=1.294 +[gpub010:0/16] 2024-02-12 01:37:23,031 (trainer:762) INFO: 45epoch:train:2801-2900batch: iter_time=8.311e-05, forward_time=0.303, loss_ctc=36.657, loss_att=33.432, acc=0.783, loss=34.400, backward_time=0.338, grad_norm=38.932, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.505e-04, train_time=1.273 +[gpub010:0/16] 2024-02-12 01:39:28,922 (trainer:762) INFO: 45epoch:train:2901-3000batch: iter_time=9.182e-05, forward_time=0.304, loss_ctc=47.543, loss_att=46.937, acc=0.793, loss=47.119, backward_time=0.301, grad_norm=43.351, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.504e-04, train_time=1.259 +[gpub010:0/16] 2024-02-12 01:41:44,881 (trainer:762) INFO: 45epoch:train:3001-3100batch: iter_time=8.848e-05, forward_time=0.306, loss_ctc=59.059, loss_att=50.490, acc=0.766, loss=53.061, backward_time=0.319, grad_norm=50.087, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.504e-04, train_time=1.359 +[gpub010:0/16] 2024-02-12 01:43:43,171 (trainer:762) INFO: 45epoch:train:3101-3200batch: iter_time=8.549e-05, forward_time=0.360, loss_ctc=51.540, loss_att=46.209, acc=0.770, loss=47.808, backward_time=0.302, grad_norm=47.351, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.504e-04, train_time=1.183 +[gpub010:0/16] 2024-02-12 01:45:50,316 (trainer:762) INFO: 45epoch:train:3201-3300batch: iter_time=8.191e-05, forward_time=0.301, loss_ctc=50.793, loss_att=41.909, acc=0.770, loss=44.574, backward_time=0.298, grad_norm=52.275, clip=100.000, loss_scale=7.737e+33, optim_step_time=0.093, optim0_lr0=1.504e-04, train_time=1.271 +[gpub010:0/16] 2024-02-12 01:48:08,638 (trainer:762) INFO: 45epoch:train:3301-3400batch: iter_time=8.353e-05, forward_time=0.309, loss_ctc=44.417, loss_att=44.162, acc=0.761, loss=44.238, backward_time=0.310, grad_norm=42.970, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.504e-04, train_time=1.383 +[gpub010:0/16] 2024-02-12 01:50:24,184 (trainer:762) INFO: 45epoch:train:3401-3500batch: iter_time=7.980e-05, forward_time=0.346, loss_ctc=43.814, loss_att=45.296, acc=0.779, loss=44.851, backward_time=0.325, grad_norm=40.057, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.504e-04, train_time=1.355 +[gpub010:0/16] 2024-02-12 01:52:24,471 (trainer:762) INFO: 45epoch:train:3501-3600batch: iter_time=8.131e-05, forward_time=0.296, loss_ctc=40.614, loss_att=44.727, acc=0.757, loss=43.493, backward_time=0.295, grad_norm=42.164, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.504e-04, train_time=1.203 +[gpub010:0/16] 2024-02-12 01:54:23,085 (trainer:762) INFO: 45epoch:train:3601-3700batch: iter_time=8.287e-05, forward_time=0.312, loss_ctc=49.919, loss_att=45.299, acc=0.750, loss=46.685, backward_time=0.331, grad_norm=51.689, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.504e-04, train_time=1.186 +[gpub010:0/16] 2024-02-12 01:56:05,424 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub010:0/16] 2024-02-12 01:56:25,251 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 01:56:28,775 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 01:56:28,775 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub010:0/16] 2024-02-12 01:56:28,779 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 02:02:04,929 (trainer:762) INFO: 45epoch:train:3701-3800batch: iter_time=3.122, forward_time=0.355, loss_ctc=42.378, loss_att=46.988, acc=0.762, loss=45.605, backward_time=0.333, grad_norm=44.322, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.503e-04, train_time=4.619 +[gpub010:0/16] 2024-02-12 02:03:58,344 (trainer:762) INFO: 45epoch:train:3801-3900batch: iter_time=7.555e-05, forward_time=0.310, loss_ctc=44.280, loss_att=43.136, acc=0.759, loss=43.479, backward_time=0.304, grad_norm=41.027, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.503e-04, train_time=1.133 +[gpub010:0/16] 2024-02-12 02:06:23,452 (trainer:762) INFO: 45epoch:train:3901-4000batch: iter_time=4.247e-04, forward_time=0.338, loss_ctc=46.335, loss_att=40.160, acc=0.775, loss=42.013, backward_time=0.298, grad_norm=43.416, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.503e-04, train_time=1.452 +[gpub010:0/16] 2024-02-12 02:08:23,796 (trainer:762) INFO: 45epoch:train:4001-4100batch: iter_time=9.074e-05, forward_time=0.311, loss_ctc=43.299, loss_att=38.859, acc=0.773, loss=40.191, backward_time=0.311, grad_norm=41.349, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.503e-04, train_time=1.203 +[gpub010:0/16] 2024-02-12 02:10:23,909 (trainer:762) INFO: 45epoch:train:4101-4200batch: iter_time=8.300e-05, forward_time=0.365, loss_ctc=41.384, loss_att=40.581, acc=0.779, loss=40.822, backward_time=0.304, grad_norm=39.885, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.503e-04, train_time=1.201 +[gpub010:0/16] 2024-02-12 02:12:47,652 (trainer:762) INFO: 45epoch:train:4201-4300batch: iter_time=8.994e-05, forward_time=0.309, loss_ctc=47.280, loss_att=49.115, acc=0.776, loss=48.565, backward_time=0.308, grad_norm=42.302, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.503e-04, train_time=1.437 +[gpub010:0/16] 2024-02-12 02:14:44,508 (trainer:762) INFO: 45epoch:train:4301-4400batch: iter_time=8.348e-05, forward_time=0.303, loss_ctc=58.154, loss_att=47.639, acc=0.763, loss=50.793, backward_time=0.303, grad_norm=51.677, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.503e-04, train_time=1.168 +[gpub010:0/16] 2024-02-12 02:16:48,736 (trainer:762) INFO: 45epoch:train:4401-4500batch: iter_time=8.182e-05, forward_time=0.359, loss_ctc=59.124, loss_att=47.175, acc=0.767, loss=50.759, backward_time=0.333, grad_norm=53.827, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.503e-04, train_time=1.242 +[gpub010:0/16] 2024-02-12 02:18:58,823 (trainer:762) INFO: 45epoch:train:4501-4600batch: iter_time=8.133e-05, forward_time=0.297, loss_ctc=45.551, loss_att=45.253, acc=0.741, loss=45.342, backward_time=0.302, grad_norm=45.268, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.503e-04, train_time=1.301 +[gpub010:0/16] 2024-02-12 02:21:11,802 (trainer:762) INFO: 45epoch:train:4601-4700batch: iter_time=8.518e-05, forward_time=0.341, loss_ctc=40.152, loss_att=38.786, acc=0.776, loss=39.195, backward_time=0.370, grad_norm=40.190, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.502e-04, train_time=1.329 +[gpub010:0/16] 2024-02-12 02:23:02,583 (trainer:762) INFO: 45epoch:train:4701-4800batch: iter_time=8.384e-05, forward_time=0.292, loss_ctc=42.123, loss_att=49.637, acc=0.754, loss=47.383, backward_time=0.298, grad_norm=44.351, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.502e-04, train_time=1.108 +[gpub010:0/16] 2024-02-12 02:24:54,083 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-12 02:25:22,351 (trainer:762) INFO: 45epoch:train:4801-4900batch: iter_time=3.309e-04, forward_time=0.360, loss_ctc=43.438, loss_att=39.841, acc=0.773, loss=40.920, backward_time=0.321, grad_norm=43.323, clip=100.000, loss_scale=9.021e+33, optim_step_time=0.099, optim0_lr0=1.502e-04, train_time=1.397 +[gpub010:0/16] 2024-02-12 02:27:29,667 (trainer:762) INFO: 45epoch:train:4901-5000batch: iter_time=8.339e-05, forward_time=0.297, loss_ctc=47.415, loss_att=45.326, acc=0.745, loss=45.953, backward_time=0.304, grad_norm=54.482, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.502e-04, train_time=1.273 +[gpub010:0/16] 2024-02-12 02:27:49,697 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub010:0/16] 2024-02-12 02:28:09,198 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 02:28:12,687 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 02:28:12,687 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub010:0/16] 2024-02-12 02:28:12,690 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 02:35:02,151 (trainer:762) INFO: 45epoch:train:5001-5100batch: iter_time=3.218, forward_time=0.338, loss_ctc=44.368, loss_att=45.195, acc=0.760, loss=44.947, backward_time=0.300, grad_norm=41.737, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.502e-04, train_time=4.524 +[gpub010:0/16] 2024-02-12 02:37:09,467 (trainer:762) INFO: 45epoch:train:5101-5200batch: iter_time=7.788e-05, forward_time=0.301, loss_ctc=41.909, loss_att=38.559, acc=0.775, loss=39.564, backward_time=0.314, grad_norm=39.786, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.502e-04, train_time=1.272 +[gpub010:0/16] 2024-02-12 02:39:19,409 (trainer:762) INFO: 45epoch:train:5201-5300batch: iter_time=7.843e-05, forward_time=0.334, loss_ctc=48.495, loss_att=44.980, acc=0.757, loss=46.034, backward_time=0.300, grad_norm=41.957, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.502e-04, train_time=1.300 +[gpub010:0/16] 2024-02-12 02:41:30,132 (trainer:762) INFO: 45epoch:train:5301-5400batch: iter_time=8.952e-05, forward_time=0.344, loss_ctc=36.429, loss_att=32.640, acc=0.788, loss=33.777, backward_time=0.301, grad_norm=38.535, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.502e-04, train_time=1.307 +[gpub010:0/16] 2024-02-12 02:43:43,731 (trainer:762) INFO: 45epoch:train:5401-5500batch: iter_time=8.200e-05, forward_time=0.294, loss_ctc=46.866, loss_att=46.437, acc=0.786, loss=46.566, backward_time=0.299, grad_norm=40.288, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.502e-04, train_time=1.336 +[gpub010:0/16] 2024-02-12 02:45:54,426 (trainer:762) INFO: 45epoch:train:5501-5600batch: iter_time=8.267e-05, forward_time=0.299, loss_ctc=58.393, loss_att=50.163, acc=0.763, loss=52.632, backward_time=0.301, grad_norm=50.926, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.501e-04, train_time=1.307 +[gpub010:0/16] 2024-02-12 02:48:03,621 (trainer:762) INFO: 45epoch:train:5601-5700batch: iter_time=8.668e-05, forward_time=0.381, loss_ctc=51.081, loss_att=45.773, acc=0.769, loss=47.365, backward_time=0.328, grad_norm=47.626, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.501e-04, train_time=1.292 +[gpub010:0/16] 2024-02-12 02:50:31,399 (trainer:762) INFO: 45epoch:train:5701-5800batch: iter_time=9.716e-05, forward_time=0.294, loss_ctc=50.090, loss_att=41.729, acc=0.767, loss=44.237, backward_time=0.297, grad_norm=49.147, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.501e-04, train_time=1.478 +[gpub010:0/16] 2024-02-12 02:52:38,933 (trainer:762) INFO: 45epoch:train:5801-5900batch: iter_time=9.016e-05, forward_time=0.292, loss_ctc=43.553, loss_att=42.748, acc=0.755, loss=42.990, backward_time=0.302, grad_norm=42.194, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.501e-04, train_time=1.275 +[gpub010:0/16] 2024-02-12 02:54:40,850 (trainer:762) INFO: 45epoch:train:5901-6000batch: iter_time=5.547e-04, forward_time=0.366, loss_ctc=43.742, loss_att=45.437, acc=0.771, loss=44.929, backward_time=0.306, grad_norm=41.358, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.101, optim0_lr0=1.501e-04, train_time=1.219 +[gpub010:0/16] 2024-02-12 02:57:09,201 (trainer:762) INFO: 45epoch:train:6001-6100batch: iter_time=8.671e-05, forward_time=0.294, loss_ctc=40.051, loss_att=42.942, acc=0.757, loss=42.075, backward_time=0.300, grad_norm=40.716, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.501e-04, train_time=1.484 +[gpub010:0/16] 2024-02-12 02:58:59,460 (trainer:762) INFO: 45epoch:train:6101-6200batch: iter_time=9.323e-05, forward_time=0.292, loss_ctc=49.614, loss_att=45.619, acc=0.745, loss=46.818, backward_time=0.298, grad_norm=54.248, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.501e-04, train_time=1.102 +[gpub010:0/16] 2024-02-12 03:00:32,023 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub010:0/16] 2024-02-12 03:00:51,088 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 03:00:54,893 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 03:00:54,893 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub010:0/16] 2024-02-12 03:00:54,896 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 03:06:34,043 (trainer:762) INFO: 45epoch:train:6201-6300batch: iter_time=3.142, forward_time=0.401, loss_ctc=41.822, loss_att=46.113, acc=0.768, loss=44.826, backward_time=0.318, grad_norm=46.624, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.110, optim0_lr0=1.501e-04, train_time=4.546 +[gpub010:0/16] 2024-02-12 03:08:34,640 (trainer:762) INFO: 45epoch:train:6301-6400batch: iter_time=7.568e-05, forward_time=0.359, loss_ctc=43.848, loss_att=44.916, acc=0.769, loss=44.595, backward_time=0.335, grad_norm=40.345, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.098, optim0_lr0=1.501e-04, train_time=1.205 +[gpub010:0/16] 2024-02-12 03:10:38,158 (trainer:762) INFO: 45epoch:train:6401-6500batch: iter_time=8.220e-05, forward_time=0.290, loss_ctc=46.049, loss_att=39.605, acc=0.785, loss=41.538, backward_time=0.296, grad_norm=61.644, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.500e-04, train_time=1.235 +[gpub010:0/16] 2024-02-12 03:12:58,182 (trainer:762) INFO: 45epoch:train:6501-6600batch: iter_time=9.014e-05, forward_time=0.295, loss_ctc=42.878, loss_att=38.479, acc=0.784, loss=39.799, backward_time=0.299, grad_norm=40.260, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.500e-04, train_time=1.400 +[gpub010:0/16] 2024-02-12 03:15:12,351 (trainer:762) INFO: 45epoch:train:6601-6700batch: iter_time=9.129e-05, forward_time=0.343, loss_ctc=41.141, loss_att=41.423, acc=0.778, loss=41.338, backward_time=0.318, grad_norm=40.224, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.500e-04, train_time=1.342 +[gpub010:0/16] 2024-02-12 03:17:06,135 (trainer:762) INFO: 45epoch:train:6701-6800batch: iter_time=9.508e-05, forward_time=0.337, loss_ctc=46.509, loss_att=48.883, acc=0.786, loss=48.170, backward_time=0.306, grad_norm=41.011, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.500e-04, train_time=1.137 +[gpub010:0/16] 2024-02-12 03:19:42,989 (trainer:762) INFO: 45epoch:train:6801-6900batch: iter_time=9.225e-05, forward_time=0.298, loss_ctc=57.698, loss_att=47.979, acc=0.767, loss=50.895, backward_time=0.307, grad_norm=50.647, clip=100.000, loss_scale=6.542e+33, optim_step_time=0.094, optim0_lr0=1.500e-04, train_time=1.568 +[gpub010:0/16] 2024-02-12 03:21:44,423 (trainer:762) INFO: 45epoch:train:6901-7000batch: iter_time=9.352e-05, forward_time=0.348, loss_ctc=58.003, loss_att=46.978, acc=0.777, loss=50.285, backward_time=0.333, grad_norm=51.568, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.500e-04, train_time=1.215 +[gpub010:0/16] 2024-02-12 03:23:46,021 (trainer:762) INFO: 45epoch:train:7001-7100batch: iter_time=8.826e-05, forward_time=0.291, loss_ctc=45.457, loss_att=44.914, acc=0.756, loss=45.077, backward_time=0.297, grad_norm=44.693, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.500e-04, train_time=1.215 +[gpub010:0/16] 2024-02-12 03:25:56,755 (trainer:762) INFO: 45epoch:train:7101-7200batch: iter_time=8.404e-05, forward_time=0.295, loss_ctc=39.979, loss_att=38.677, acc=0.783, loss=39.067, backward_time=0.301, grad_norm=37.970, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.500e-04, train_time=1.307 +[gpub010:0/16] 2024-02-12 03:27:58,759 (trainer:762) INFO: 45epoch:train:7201-7300batch: iter_time=8.761e-05, forward_time=0.394, loss_ctc=42.135, loss_att=50.604, acc=0.761, loss=48.064, backward_time=0.307, grad_norm=41.956, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.500e-04, train_time=1.220 +[gpub010:0/16] 2024-02-12 03:30:17,118 (trainer:762) INFO: 45epoch:train:7301-7400batch: iter_time=8.176e-05, forward_time=0.385, loss_ctc=43.875, loss_att=40.204, acc=0.775, loss=41.305, backward_time=0.325, grad_norm=40.959, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.099, optim0_lr0=1.499e-04, train_time=1.383 +[gpub010:0/16] 2024-02-12 03:31:26,202 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-12 03:32:19,378 (trainer:762) INFO: 45epoch:train:7401-7500batch: iter_time=8.081e-05, forward_time=0.316, loss_ctc=46.583, loss_att=47.023, acc=0.750, loss=46.891, backward_time=0.297, grad_norm=53.488, clip=100.000, loss_scale=7.920e+33, optim_step_time=0.093, optim0_lr0=1.499e-04, train_time=1.222 +[gpub010:0/16] 2024-02-12 03:32:39,405 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub010:0/16] 2024-02-12 03:32:59,061 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 03:33:02,528 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 03:33:02,528 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub010:0/16] 2024-02-12 03:33:02,532 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 03:39:06,453 (trainer:762) INFO: 45epoch:train:7501-7600batch: iter_time=2.685, forward_time=0.295, loss_ctc=44.453, loss_att=46.779, acc=0.756, loss=46.081, backward_time=0.301, grad_norm=42.706, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.499e-04, train_time=4.070 +[gpub010:0/16] 2024-02-12 03:41:07,561 (trainer:762) INFO: 45epoch:train:7601-7700batch: iter_time=7.896e-05, forward_time=0.395, loss_ctc=41.892, loss_att=38.991, acc=0.776, loss=39.861, backward_time=0.310, grad_norm=40.024, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.499e-04, train_time=1.211 +[gpub010:0/16] 2024-02-12 03:43:10,533 (trainer:762) INFO: 45epoch:train:7701-7800batch: iter_time=8.248e-05, forward_time=0.295, loss_ctc=48.246, loss_att=44.687, acc=0.759, loss=45.755, backward_time=0.304, grad_norm=44.018, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.499e-04, train_time=1.230 +[gpub010:0/16] 2024-02-12 03:45:32,836 (trainer:762) INFO: 45epoch:train:7801-7900batch: iter_time=4.234e-04, forward_time=0.368, loss_ctc=36.236, loss_att=32.597, acc=0.788, loss=33.689, backward_time=0.308, grad_norm=36.871, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.499e-04, train_time=1.423 +[gpub010:0/16] 2024-02-12 03:47:40,583 (trainer:762) INFO: 45epoch:train:7901-8000batch: iter_time=8.598e-05, forward_time=0.295, loss_ctc=46.828, loss_att=46.379, acc=0.787, loss=46.514, backward_time=0.299, grad_norm=41.492, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.499e-04, train_time=1.277 +[gpub010:0/16] 2024-02-12 03:49:30,199 (trainer:762) INFO: 45epoch:train:8001-8100batch: iter_time=8.353e-05, forward_time=0.297, loss_ctc=59.193, loss_att=49.914, acc=0.763, loss=52.698, backward_time=0.307, grad_norm=54.663, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.499e-04, train_time=1.096 +[gpub010:0/16] 2024-02-12 03:51:56,831 (trainer:762) INFO: 45epoch:train:8101-8200batch: iter_time=1.910e-04, forward_time=0.378, loss_ctc=50.856, loss_att=45.275, acc=0.771, loss=46.949, backward_time=0.305, grad_norm=44.831, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.499e-04, train_time=1.466 +[gpub010:0/16] 2024-02-12 03:54:10,482 (trainer:762) INFO: 45epoch:train:8201-8300batch: iter_time=8.386e-05, forward_time=0.298, loss_ctc=49.806, loss_att=41.642, acc=0.769, loss=44.091, backward_time=0.305, grad_norm=42.964, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.498e-04, train_time=1.336 +[gpub010:0/16] 2024-02-12 03:56:06,724 (trainer:762) INFO: 45epoch:train:8301-8400batch: iter_time=8.366e-05, forward_time=0.318, loss_ctc=43.266, loss_att=43.805, acc=0.752, loss=43.643, backward_time=0.333, grad_norm=44.671, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.498e-04, train_time=1.163 +[gpub010:0/16] 2024-02-12 03:58:33,088 (trainer:762) INFO: 45epoch:train:8401-8500batch: iter_time=4.734e-04, forward_time=0.355, loss_ctc=43.401, loss_att=45.480, acc=0.771, loss=44.856, backward_time=0.313, grad_norm=39.812, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.498e-04, train_time=1.463 +[gpub010:0/16] 2024-02-12 04:00:37,449 (trainer:762) INFO: 45epoch:train:8501-8600batch: iter_time=8.098e-05, forward_time=0.295, loss_ctc=39.562, loss_att=42.600, acc=0.759, loss=41.688, backward_time=0.301, grad_norm=41.459, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.498e-04, train_time=1.244 +[gpub010:0/16] 2024-02-12 04:02:58,316 (trainer:762) INFO: 45epoch:train:8601-8700batch: iter_time=1.590e-04, forward_time=0.421, loss_ctc=49.072, loss_att=45.664, acc=0.748, loss=46.686, backward_time=0.307, grad_norm=53.244, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.498e-04, train_time=1.408 +[gpub010:0/16] 2024-02-12 04:04:12,325 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub010:0/16] 2024-02-12 04:04:31,635 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 04:04:35,221 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 04:04:35,221 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub010:0/16] 2024-02-12 04:04:35,227 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 04:10:10,338 (trainer:762) INFO: 45epoch:train:8701-8800batch: iter_time=3.138, forward_time=0.294, loss_ctc=41.481, loss_att=45.336, acc=0.769, loss=44.179, backward_time=0.304, grad_norm=44.443, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.498e-04, train_time=4.321 +[gpub010:0/16] 2024-02-12 04:12:31,647 (trainer:762) INFO: 45epoch:train:8801-8900batch: iter_time=7.367e-05, forward_time=0.292, loss_ctc=43.974, loss_att=44.971, acc=0.769, loss=44.672, backward_time=0.296, grad_norm=42.296, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.498e-04, train_time=1.413 +[gpub010:0/16] 2024-02-12 04:14:32,794 (trainer:762) INFO: 45epoch:train:8901-9000batch: iter_time=2.431e-04, forward_time=0.386, loss_ctc=45.660, loss_att=39.165, acc=0.786, loss=41.113, backward_time=0.316, grad_norm=41.486, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.498e-04, train_time=1.210 +[gpub010:0/16] 2024-02-12 04:16:35,195 (trainer:762) INFO: 45epoch:train:9001-9100batch: iter_time=8.290e-05, forward_time=0.297, loss_ctc=42.757, loss_att=38.221, acc=0.786, loss=39.582, backward_time=0.300, grad_norm=42.274, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.498e-04, train_time=1.224 +[gpub010:0/16] 2024-02-12 04:19:01,487 (trainer:762) INFO: 45epoch:train:9101-9200batch: iter_time=8.178e-05, forward_time=0.321, loss_ctc=40.908, loss_att=41.421, acc=0.780, loss=41.267, backward_time=0.352, grad_norm=40.717, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.497e-04, train_time=1.464 +[gpub010:0/16] 2024-02-12 04:20:56,670 (trainer:762) INFO: 45epoch:train:9201-9300batch: iter_time=8.080e-05, forward_time=0.298, loss_ctc=46.506, loss_att=49.250, acc=0.787, loss=48.427, backward_time=0.304, grad_norm=38.892, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.497e-04, train_time=1.151 +[gpub010:0/16] 2024-02-12 04:23:15,804 (trainer:762) INFO: 45epoch:train:9301-9400batch: iter_time=1.398e-04, forward_time=0.329, loss_ctc=57.662, loss_att=47.835, acc=0.767, loss=50.783, backward_time=0.342, grad_norm=50.669, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.497e-04, train_time=1.391 +[gpub010:0/16] 2024-02-12 04:25:22,248 (trainer:762) INFO: 45epoch:train:9401-9500batch: iter_time=8.094e-05, forward_time=0.295, loss_ctc=57.482, loss_att=46.739, acc=0.779, loss=49.962, backward_time=0.300, grad_norm=48.031, clip=100.000, loss_scale=7.633e+33, optim_step_time=0.093, optim0_lr0=1.497e-04, train_time=1.264 +[gpub010:0/16] 2024-02-12 04:27:38,226 (trainer:762) INFO: 45epoch:train:9501-9600batch: iter_time=8.205e-05, forward_time=0.355, loss_ctc=44.597, loss_att=44.008, acc=0.756, loss=44.184, backward_time=0.351, grad_norm=42.529, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.497e-04, train_time=1.360 +[gpub010:0/16] 2024-02-12 04:29:42,494 (trainer:762) INFO: 45epoch:train:9601-9700batch: iter_time=8.247e-05, forward_time=0.289, loss_ctc=39.998, loss_att=38.536, acc=0.783, loss=38.975, backward_time=0.295, grad_norm=39.547, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.497e-04, train_time=1.242 +[gpub010:0/16] 2024-02-12 04:31:59,698 (trainer:762) INFO: 45epoch:train:9701-9800batch: iter_time=8.262e-05, forward_time=0.297, loss_ctc=41.710, loss_att=50.194, acc=0.762, loss=47.649, backward_time=0.305, grad_norm=42.940, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.497e-04, train_time=1.372 +[gpub010:0/16] 2024-02-12 04:34:02,628 (trainer:762) INFO: 45epoch:train:9801-9900batch: iter_time=2.855e-04, forward_time=0.395, loss_ctc=43.258, loss_att=39.635, acc=0.778, loss=40.722, backward_time=0.318, grad_norm=41.369, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.497e-04, train_time=1.229 +[gpub010:0/16] 2024-02-12 04:35:56,724 (trainer:762) INFO: 45epoch:train:9901-10000batch: iter_time=7.898e-05, forward_time=0.293, loss_ctc=46.100, loss_att=45.426, acc=0.751, loss=45.628, backward_time=0.298, grad_norm=60.826, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.497e-04, train_time=1.141 +[gpub010:0/16] 2024-02-12 04:36:16,794 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub010:0/16] 2024-02-12 04:36:36,442 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 04:36:39,976 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 04:36:39,976 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub010:0/16] 2024-02-12 04:36:39,981 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 04:43:29,780 (trainer:762) INFO: 45epoch:train:10001-10100batch: iter_time=3.182, forward_time=0.395, loss_ctc=44.188, loss_att=46.926, acc=0.757, loss=46.105, backward_time=0.315, grad_norm=43.552, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.496e-04, train_time=4.530 +[gpub010:0/16] 2024-02-12 04:45:22,590 (trainer:762) INFO: 45epoch:train:10101-10200batch: iter_time=8.230e-05, forward_time=0.290, loss_ctc=41.208, loss_att=38.961, acc=0.776, loss=39.635, backward_time=0.295, grad_norm=41.383, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.496e-04, train_time=1.128 +[gpub010:0/16] 2024-02-12 04:47:42,921 (trainer:762) INFO: 45epoch:train:10201-10300batch: iter_time=8.219e-05, forward_time=0.299, loss_ctc=48.102, loss_att=45.110, acc=0.756, loss=46.008, backward_time=0.304, grad_norm=42.688, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.095, optim0_lr0=1.496e-04, train_time=1.403 +[gpub010:0/16] 2024-02-12 04:49:50,132 (trainer:762) INFO: 45epoch:train:10301-10400batch: iter_time=8.410e-05, forward_time=0.366, loss_ctc=35.840, loss_att=32.405, acc=0.790, loss=33.436, backward_time=0.302, grad_norm=38.203, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.127, optim0_lr0=1.496e-04, train_time=1.272 +[gpub010:0/16] 2024-02-12 04:51:46,902 (trainer:762) INFO: 45epoch:train:10401-10500batch: iter_time=8.295e-05, forward_time=0.293, loss_ctc=46.768, loss_att=46.340, acc=0.787, loss=46.468, backward_time=0.299, grad_norm=41.969, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.496e-04, train_time=1.168 +[gpub010:0/16] 2024-02-12 04:54:16,668 (trainer:762) INFO: 45epoch:train:10501-10600batch: iter_time=8.207e-05, forward_time=0.413, loss_ctc=58.667, loss_att=50.090, acc=0.763, loss=52.663, backward_time=0.326, grad_norm=49.831, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.097, optim0_lr0=1.496e-04, train_time=1.497 +[gpub010:0/16] 2024-02-12 04:56:15,404 (trainer:762) INFO: 45epoch:train:10601-10700batch: iter_time=7.990e-05, forward_time=0.293, loss_ctc=50.221, loss_att=45.508, acc=0.771, loss=46.922, backward_time=0.298, grad_norm=44.310, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.496e-04, train_time=1.187 +[gpub010:0/16] 2024-02-12 04:58:22,656 (trainer:762) INFO: 45epoch:train:10701-10800batch: iter_time=7.994e-05, forward_time=0.292, loss_ctc=49.347, loss_att=41.526, acc=0.769, loss=43.872, backward_time=0.297, grad_norm=44.498, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.496e-04, train_time=1.272 +[gpub010:0/16] 2024-02-12 05:00:39,312 (trainer:762) INFO: 45epoch:train:10801-10900batch: iter_time=8.086e-05, forward_time=0.373, loss_ctc=42.852, loss_att=42.567, acc=0.757, loss=42.652, backward_time=0.313, grad_norm=42.444, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.104, optim0_lr0=1.496e-04, train_time=1.366 +[gpub010:0/16] 2024-02-12 05:02:39,259 (trainer:762) INFO: 45epoch:train:10901-11000batch: iter_time=8.017e-05, forward_time=0.291, loss_ctc=43.513, loss_att=45.559, acc=0.771, loss=44.945, backward_time=0.297, grad_norm=41.297, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.495e-04, train_time=1.200 +[gpub010:0/16] 2024-02-12 05:05:11,880 (trainer:762) INFO: 45epoch:train:11001-11100batch: iter_time=8.317e-05, forward_time=0.388, loss_ctc=39.618, loss_att=42.671, acc=0.759, loss=41.755, backward_time=0.309, grad_norm=40.005, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.121, optim0_lr0=1.495e-04, train_time=1.526 +[gpub010:0/16] 2024-02-12 05:07:07,432 (trainer:762) INFO: 45epoch:train:11101-11200batch: iter_time=8.189e-05, forward_time=0.293, loss_ctc=48.708, loss_att=45.579, acc=0.746, loss=46.518, backward_time=0.298, grad_norm=51.810, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.495e-04, train_time=1.155 +[gpub010:0/16] 2024-02-12 05:08:23,668 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub010:0/16] 2024-02-12 05:08:42,935 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 05:08:46,543 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 05:08:46,543 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub010:0/16] 2024-02-12 05:08:46,592 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 05:14:22,409 (trainer:762) INFO: 45epoch:train:11201-11300batch: iter_time=3.097, forward_time=0.291, loss_ctc=41.152, loss_att=44.999, acc=0.773, loss=43.845, backward_time=0.330, grad_norm=43.553, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.098, optim0_lr0=1.495e-04, train_time=4.350 +[gpub010:0/16] 2024-02-12 05:16:17,945 (trainer:762) INFO: 45epoch:train:11301-11400batch: iter_time=7.961e-05, forward_time=0.342, loss_ctc=43.688, loss_att=44.400, acc=0.772, loss=44.186, backward_time=0.303, grad_norm=40.601, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.495e-04, train_time=1.155 +[gpub010:0/16] 2024-02-12 05:18:26,178 (trainer:762) INFO: 45epoch:train:11401-11500batch: iter_time=7.933e-05, forward_time=0.292, loss_ctc=45.695, loss_att=39.715, acc=0.784, loss=41.509, backward_time=0.296, grad_norm=43.768, clip=100.000, loss_scale=1.527e+34, optim_step_time=0.093, optim0_lr0=1.495e-04, train_time=1.282 +[gpub010:0/16] 2024-02-12 05:19:07,259 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-12 05:20:50,259 (trainer:762) INFO: 45epoch:train:11501-11600batch: iter_time=8.375e-05, forward_time=0.395, loss_ctc=42.429, loss_att=37.675, acc=0.788, loss=39.101, backward_time=0.334, grad_norm=38.844, clip=100.000, loss_scale=1.227e+34, optim_step_time=0.098, optim0_lr0=1.495e-04, train_time=1.441 +[gpub010:0/16] 2024-02-12 05:22:47,525 (trainer:762) INFO: 45epoch:train:11601-11700batch: iter_time=8.243e-05, forward_time=0.291, loss_ctc=41.122, loss_att=41.647, acc=0.779, loss=41.489, backward_time=0.296, grad_norm=40.200, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.495e-04, train_time=1.173 +[gpub010:0/16] 2024-02-12 05:25:21,380 (trainer:762) INFO: 45epoch:train:11701-11800batch: iter_time=8.552e-05, forward_time=0.331, loss_ctc=46.821, loss_att=48.236, acc=0.788, loss=47.811, backward_time=0.308, grad_norm=40.173, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.106, optim0_lr0=1.495e-04, train_time=1.538 +[gpub010:0/16] 2024-02-12 05:27:27,972 (trainer:762) INFO: 45epoch:train:11801-11900batch: iter_time=8.550e-05, forward_time=0.340, loss_ctc=57.414, loss_att=47.912, acc=0.768, loss=50.762, backward_time=0.308, grad_norm=47.521, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.094, optim0_lr0=1.494e-04, train_time=1.266 +[gpub010:0/16] 2024-02-12 05:29:37,212 (trainer:762) INFO: 45epoch:train:11901-12000batch: iter_time=8.168e-05, forward_time=0.295, loss_ctc=57.579, loss_att=47.109, acc=0.777, loss=50.250, backward_time=0.301, grad_norm=47.345, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.494e-04, train_time=1.292 +[gpub010:0/16] 2024-02-12 05:32:01,793 (trainer:762) INFO: 45epoch:train:12001-12100batch: iter_time=9.433e-05, forward_time=0.403, loss_ctc=44.864, loss_att=44.261, acc=0.754, loss=44.441, backward_time=0.325, grad_norm=43.116, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.494e-04, train_time=1.446 +[gpub010:0/16] 2024-02-12 05:33:53,178 (trainer:762) INFO: 45epoch:train:12101-12200batch: iter_time=8.586e-05, forward_time=0.291, loss_ctc=39.958, loss_att=38.934, acc=0.783, loss=39.241, backward_time=0.297, grad_norm=38.948, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.494e-04, train_time=1.113 +[gpub010:0/16] 2024-02-12 05:36:09,378 (trainer:762) INFO: 45epoch:train:12201-12300batch: iter_time=8.327e-05, forward_time=0.292, loss_ctc=41.876, loss_att=50.556, acc=0.762, loss=47.952, backward_time=0.297, grad_norm=42.614, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.093, optim0_lr0=1.494e-04, train_time=1.362 +[gpub010:0/16] 2024-02-12 05:38:29,648 (trainer:762) INFO: 45epoch:train:12301-12400batch: iter_time=8.122e-05, forward_time=0.371, loss_ctc=42.631, loss_att=40.140, acc=0.775, loss=40.888, backward_time=0.325, grad_norm=40.617, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.100, optim0_lr0=1.494e-04, train_time=1.402 +[gpub010:0/16] 2024-02-12 05:38:46,194 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-12 05:40:30,875 (trainer:762) INFO: 45epoch:train:12401-12500batch: iter_time=8.061e-05, forward_time=0.292, loss_ctc=46.425, loss_att=45.914, acc=0.754, loss=46.067, backward_time=0.297, grad_norm=54.397, clip=100.000, loss_scale=5.769e+33, optim_step_time=0.093, optim0_lr0=1.494e-04, train_time=1.212 +[gpub010:0/16] 2024-02-12 05:40:50,904 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub010:0/16] 2024-02-12 05:41:10,481 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 05:41:14,090 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 05:41:14,090 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub010:0/16] 2024-02-12 05:41:14,093 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 05:47:51,983 (trainer:762) INFO: 45epoch:train:12501-12600batch: iter_time=3.123, forward_time=0.348, loss_ctc=43.642, loss_att=46.387, acc=0.758, loss=45.563, backward_time=0.312, grad_norm=43.506, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.494e-04, train_time=4.411 +[gpub010:0/16] 2024-02-12 05:50:01,581 (trainer:762) INFO: 45epoch:train:12601-12700batch: iter_time=8.391e-05, forward_time=0.334, loss_ctc=41.529, loss_att=38.979, acc=0.776, loss=39.744, backward_time=0.299, grad_norm=42.564, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.494e-04, train_time=1.296 +[gpub010:0/16] 2024-02-12 05:52:15,918 (trainer:762) INFO: 45epoch:train:12701-12800batch: iter_time=8.069e-05, forward_time=0.292, loss_ctc=47.600, loss_att=44.513, acc=0.759, loss=45.439, backward_time=0.297, grad_norm=43.597, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.493e-04, train_time=1.343 +[gpub010:0/16] 2024-02-12 05:54:19,321 (trainer:762) INFO: 45epoch:train:12801-12900batch: iter_time=8.528e-05, forward_time=0.330, loss_ctc=35.876, loss_att=32.332, acc=0.790, loss=33.395, backward_time=0.303, grad_norm=36.259, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.102, optim0_lr0=1.493e-04, train_time=1.234 +[gpub010:0/16] 2024-02-12 05:56:30,847 (trainer:762) INFO: 45epoch:train:12901-13000batch: iter_time=3.633e-04, forward_time=0.338, loss_ctc=46.780, loss_att=46.185, acc=0.787, loss=46.363, backward_time=0.304, grad_norm=40.795, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.493e-04, train_time=1.315 +[gpub010:0/16] 2024-02-12 05:58:49,987 (trainer:762) INFO: 45epoch:train:13001-13100batch: iter_time=8.637e-05, forward_time=0.347, loss_ctc=58.710, loss_att=49.968, acc=0.764, loss=52.591, backward_time=0.312, grad_norm=47.039, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.493e-04, train_time=1.391 +[gpub010:0/16] 2024-02-12 06:01:00,452 (trainer:762) INFO: 45epoch:train:13101-13200batch: iter_time=8.214e-05, forward_time=0.350, loss_ctc=50.462, loss_att=45.540, acc=0.771, loss=47.016, backward_time=0.308, grad_norm=45.662, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.493e-04, train_time=1.305 +[gpub010:0/16] 2024-02-12 06:03:19,804 (trainer:762) INFO: 45epoch:train:13201-13300batch: iter_time=8.480e-05, forward_time=0.291, loss_ctc=50.045, loss_att=41.988, acc=0.768, loss=44.405, backward_time=0.297, grad_norm=44.962, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.493e-04, train_time=1.393 +[gpub010:0/16] 2024-02-12 06:05:32,838 (trainer:762) INFO: 45epoch:train:13301-13400batch: iter_time=8.491e-05, forward_time=0.351, loss_ctc=43.071, loss_att=42.682, acc=0.758, loss=42.799, backward_time=0.309, grad_norm=43.911, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.493e-04, train_time=1.330 +[gpub010:0/16] 2024-02-12 06:07:43,012 (trainer:762) INFO: 45epoch:train:13401-13500batch: iter_time=2.334e-04, forward_time=0.344, loss_ctc=43.105, loss_att=45.103, acc=0.773, loss=44.504, backward_time=0.302, grad_norm=38.807, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.493e-04, train_time=1.302 +[gpub010:0/16] 2024-02-12 06:10:03,286 (trainer:762) INFO: 45epoch:train:13501-13600batch: iter_time=8.416e-05, forward_time=0.352, loss_ctc=38.843, loss_att=42.438, acc=0.762, loss=41.359, backward_time=0.301, grad_norm=40.501, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.493e-04, train_time=1.402 +[gpub010:0/16] 2024-02-12 06:12:01,094 (trainer:762) INFO: 45epoch:train:13601-13700batch: iter_time=1.550e-04, forward_time=0.375, loss_ctc=47.941, loss_att=44.988, acc=0.747, loss=45.874, backward_time=0.304, grad_norm=51.786, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.492e-04, train_time=1.178 +[gpub010:0/16] 2024-02-12 06:13:32,068 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub010:0/16] 2024-02-12 06:13:51,489 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub010:0/16] 2024-02-12 06:13:54,973 (abs_task:1663) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub010:0/16] 2024-02-12 06:13:54,973 (abs_task:1664) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub010:0/16] 2024-02-12 06:13:55,029 (abs_task:1665) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub010:0/16] 2024-02-12 06:19:25,843 (trainer:762) INFO: 45epoch:train:13701-13800batch: iter_time=3.055, forward_time=0.324, loss_ctc=41.222, loss_att=45.338, acc=0.772, loss=44.103, backward_time=0.301, grad_norm=45.596, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.492e-04, train_time=4.447 +[gpub010:0/16] 2024-02-12 06:21:19,568 (trainer:762) INFO: 45epoch:train:13801-13900batch: iter_time=7.946e-05, forward_time=0.342, loss_ctc=43.645, loss_att=44.179, acc=0.773, loss=44.019, backward_time=0.303, grad_norm=40.311, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.094, optim0_lr0=1.492e-04, train_time=1.137 +[gpub010:0/16] 2024-02-12 06:23:41,287 (trainer:762) INFO: 45epoch:train:13901-14000batch: iter_time=8.008e-05, forward_time=0.319, loss_ctc=45.620, loss_att=39.810, acc=0.783, loss=41.553, backward_time=0.322, grad_norm=41.655, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.492e-04, train_time=1.417 +[gpub010:0/16] 2024-02-12 06:25:35,826 (trainer:762) INFO: 45epoch:train:14001-14100batch: iter_time=8.331e-05, forward_time=0.292, loss_ctc=42.676, loss_att=38.137, acc=0.788, loss=39.499, backward_time=0.298, grad_norm=39.276, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.492e-04, train_time=1.145 +[gpub010:0/16] 2024-02-12 06:27:28,874 (trainer:762) INFO: 45epoch:train:14101-14200batch: iter_time=8.667e-05, forward_time=0.320, loss_ctc=40.883, loss_att=41.290, acc=0.781, loss=41.168, backward_time=0.319, grad_norm=39.384, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.095, optim0_lr0=1.492e-04, train_time=1.130 +[gpub010:0/16] 2024-02-12 06:30:09,375 (trainer:762) INFO: 45epoch:train:14201-14300batch: iter_time=9.141e-05, forward_time=0.349, loss_ctc=46.300, loss_att=48.069, acc=0.789, loss=47.538, backward_time=0.318, grad_norm=40.512, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.492e-04, train_time=1.605 +[gpub010:0/16] 2024-02-12 06:32:03,860 (trainer:762) INFO: 45epoch:train:14301-14400batch: iter_time=8.755e-05, forward_time=0.344, loss_ctc=57.358, loss_att=47.813, acc=0.768, loss=50.677, backward_time=0.308, grad_norm=51.025, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.492e-04, train_time=1.145 +[gpub010:0/16] 2024-02-12 06:34:21,430 (trainer:762) INFO: 45epoch:train:14401-14500batch: iter_time=8.070e-05, forward_time=0.296, loss_ctc=57.971, loss_att=47.067, acc=0.776, loss=50.338, backward_time=0.301, grad_norm=49.884, clip=100.000, loss_scale=9.762e+33, optim_step_time=0.093, optim0_lr0=1.492e-04, train_time=1.375 +[gpub010:0/16] 2024-02-12 06:36:32,335 (trainer:762) INFO: 45epoch:train:14501-14600batch: iter_time=8.012e-05, forward_time=0.327, loss_ctc=44.461, loss_att=44.121, acc=0.757, loss=44.223, backward_time=0.320, grad_norm=45.405, clip=100.000, loss_scale=1.038e+34, optim_step_time=0.096, optim0_lr0=1.491e-04, train_time=1.309 +[gpub010:0/16] 2024-02-12 06:36:42,092 (trainer:693) WARNING: The grad norm is nan. Skipping updating the model. +[gpub010:0/16] 2024-02-12 06:38:29,059 (trainer:762) INFO: 45epoch:train:14601-14700batch: iter_time=2.559e-04, forward_time=0.350, loss_ctc=39.460, loss_att=38.487, acc=0.785, loss=38.779, backward_time=0.303, grad_norm=38.826, clip=100.000, loss_scale=5.612e+33, optim_step_time=0.093, optim0_lr0=1.491e-04, train_time=1.168 +[gpub010:0/16] 2024-02-12 06:40:41,551 (trainer:762) INFO: 45epoch:train:14701-14800batch: iter_time=8.601e-05, forward_time=0.291, loss_ctc=41.600, loss_att=50.541, acc=0.763, loss=47.859, backward_time=0.296, grad_norm=41.954, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.093, optim0_lr0=1.491e-04, train_time=1.325 +[gpub010:0/16] 2024-02-12 06:43:00,382 (trainer:762) INFO: 45epoch:train:14801-14900batch: iter_time=8.265e-05, forward_time=0.333, loss_ctc=42.811, loss_att=40.067, acc=0.775, loss=40.890, backward_time=0.322, grad_norm=42.227, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.097, optim0_lr0=1.491e-04, train_time=1.388 +[gpub010:0/16] 2024-02-12 06:45:08,079 (trainer:762) INFO: 45epoch:train:14901-15000batch: iter_time=8.281e-05, forward_time=0.341, loss_ctc=45.669, loss_att=45.008, acc=0.757, loss=45.207, backward_time=0.308, grad_norm=52.642, clip=100.000, loss_scale=5.192e+33, optim_step_time=0.096, optim0_lr0=1.491e-04, train_time=1.277 +[gpub010:0/16] 2024-02-12 07:22:53,575 (trainer:361) INFO: 45epoch results: [train] iter_time=0.249, forward_time=0.326, loss_ctc=46.133, loss_att=43.984, acc=0.769, loss=44.629, backward_time=0.309, grad_norm=44.287, clip=100.000, loss_scale=7.252e+33, optim_step_time=0.096, optim0_lr0=1.499e-04, train_time=1.552, time=6 hours, 28 minutes and 31.74 seconds, total_count=705000, gpu_max_cached_mem_GB=42.092, [valid] loss_ctc=33.517, cer_ctc=0.172, loss_att=37.876, acc=0.692, cer=0.310, wer=0.991, loss=36.569, time=37 minutes and 19.84 seconds, total_count=219537, gpu_max_cached_mem_GB=42.092 +gpub037:2479731:2479788 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub037:2479732:2479785 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub037:2479733:2479786 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub076:3268962:3269017 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub037:2479733:2479733 [2] NCCL INFO comm 0x55c8aa09ba70 rank 6 nranks 16 cudaDev 2 busId 85000 - Abort COMPLETE +gpub076:3268963:3269016 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub076:3268960:3269018 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub037:2479734:2479787 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub010:2415461:2415514 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub010:2415460:2415516 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub076:3268962:3268962 [2] NCCL INFO comm 0x561f7f19bb70 rank 10 nranks 16 cudaDev 2 busId 85000 - Abort COMPLETE +gpub076:3268961:3269019 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub010:2415460:2415460 [2] NCCL INFO comm 0x560007013060 rank 2 nranks 16 cudaDev 2 busId 85000 - Abort COMPLETE +gpub010:2415459:2415517 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub010:2415459:2415459 [1] NCCL INFO comm 0x5576d44db4b0 rank 1 nranks 16 cudaDev 1 busId 46000 - Abort COMPLETE +gpub082:3674493:3674551 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub082:3674496:3674549 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub082:3674494:3674550 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub082:3674495:3674552 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub082:3674494:3674494 [1] NCCL INFO comm 0x5605a17f6130 rank 13 nranks 16 cudaDev 1 busId 46000 - Abort COMPLETE +gpub082:3674495:3674495 [2] NCCL INFO comm 0x556db2db58a0 rank 14 nranks 16 cudaDev 2 busId 85000 - Abort COMPLETE +gpub076:3268963:3268963 [3] NCCL INFO comm 0x56218608af50 rank 11 nranks 16 cudaDev 3 busId c7000 - Abort COMPLETE +gpub076:3268961:3268961 [1] NCCL INFO comm 0x558968564390 rank 9 nranks 16 cudaDev 1 busId 46000 - Abort COMPLETE +gpub076:3268960:3268960 [0] NCCL INFO comm 0x55f0b7620d20 rank 8 nranks 16 cudaDev 0 busId 7000 - Abort COMPLETE +gpub010:2415461:2415461 [3] NCCL INFO comm 0x56066bfecd40 rank 3 nranks 16 cudaDev 3 busId c7000 - Abort COMPLETE +gpub082:3674496:3674496 [3] NCCL INFO comm 0x55cb4fbfa630 rank 15 nranks 16 cudaDev 3 busId c7000 - Abort COMPLETE +gpub037:2479731:2479731 [0] NCCL INFO comm 0x556289adbe40 rank 4 nranks 16 cudaDev 0 busId 7000 - Abort COMPLETE +gpub037:2479734:2479734 [3] NCCL INFO comm 0x560fe900a3e0 rank 7 nranks 16 cudaDev 3 busId c7000 - Abort COMPLETE +gpub082:3674493:3674493 [0] NCCL INFO comm 0x5595c37939f0 rank 12 nranks 16 cudaDev 0 busId 7000 - Abort COMPLETE +gpub037:2479732:2479732 [1] NCCL INFO comm 0x5641d0bbff50 rank 5 nranks 16 cudaDev 1 busId 46000 - Abort COMPLETE +[gpub010:0/16] 2024-02-12 07:23:02,819 (trainer:416) INFO: The best model has been updated: valid.total_count +[gpub010:0/16] 2024-02-12 07:23:02,932 (trainer:470) INFO: The model files were removed: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/40epoch.pth +[gpub010:0/16] 2024-02-12 07:23:02,932 (trainer:488) INFO: The training was finished at 45 epochs +[gpub010:0/16] 2024-02-12 07:23:02,959 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.acc.ave_5best.pth +[gpub010:0/16] 2024-02-12 07:23:21,911 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.pth +gpub010:2415458:2415515 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub010:2415458:2415458 [0] NCCL INFO comm 0x5627bbb736f0 rank 0 nranks 16 cudaDev 0 busId 7000 - Abort COMPLETE +# Accounting: begin_time=1707587278 +# Accounting: end_time=1707744214 +# Accounting: time=156936 threads=1 +# Finished at Mon Feb 12 07:23:34 CST 2024 with status 0 diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.acc.ave_5best.pth b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.acc.ave_5best.pth new file mode 100644 index 0000000000000000000000000000000000000000..85f667630bbd7a8c101ee571d31f89335fa720f9 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.acc.ave_5best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f15bc3dde14fa43e480e58e4f563bb6e1463bfd1f5960f1135ab21e007fbb11 +size 1466924749 diff --git a/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.pth b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.pth new file mode 100644 index 0000000000000000000000000000000000000000..81580fa6f8ea714ffb8b6968023efc51ce30f991 --- /dev/null +++ b/exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51c19d07c251b1bfdea44a3c84a0d1a189a8e64a7bf8b364c955806ecb0247c +size 1466929645 diff --git a/meta.yaml b/meta.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f906ae310ec32d6d4a990b2b665e8807a5dccead --- /dev/null +++ b/meta.yaml @@ -0,0 +1,8 @@ +espnet: '202310' +files: + s2t_model_file: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/valid.acc.ave_5best.pth +python: 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0] +timestamp: 1725245981.515642 +torch: 1.13.1 +yaml_files: + s2t_train_config: exp/s2t_train_s2t_ebf_conv2d_size768_e9_d9_piecewise_lr5e-4_warmup60k_flashattn_raw_bpe50000/config.yaml